Spaces:

subhrajit-mohanty
/

rag_api

Running

App Files Files Community

SUBHRAJIT MOHANTY commited on Jul 9, 2025

Commit

bcc14d5

1 Parent(s): 347dbd1

Openai sdk is replaced with Groq

Browse files

Files changed (2) hide show

app.py +100 -58
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import os
 from contextlib import asynccontextmanager
 # Third-party imports
-from groq import AsyncGroq
 from qdrant_client import AsyncQdrantClient
 from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
 from sentence_transformers import SentenceTransformer
@@ -49,6 +49,7 @@ class ChatCompletionChunk(BaseModel):
 # Configuration
 class Config:
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")
     QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
     QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
     COLLECTION_NAME = os.getenv("COLLECTION_NAME", "documents")
@@ -60,7 +61,7 @@ class Config:
 class ApplicationState:
     """Application state container"""
     def __init__(self):
-        self.groq_client = None
         self.qdrant_client = None
         self.embedding_service = None
@@ -205,6 +206,9 @@ class RAGService:
                 print("Error: Embedding service is not initialized")
                 return []
             # Get query embedding - all-MiniLM works well without special prefixes
             query_embedding = await app_state.embedding_service.get_query_embedding(query)
@@ -231,6 +235,31 @@ class RAGService:
             print(f"Error retrieving chunks: {e}")
             return []
     @staticmethod
     def build_context_prompt(query: str, chunks: List[str]) -> str:
         """Build a context-aware prompt with retrieved chunks"""
@@ -278,19 +307,20 @@ async def health_check():
     return {
         "status": "healthy" if app_state.embedding_service is not None else "unhealthy",
-        "groq": "connected" if app_state.groq_client else "not configured",
         "qdrant": qdrant_status,
         "embedding_service": embedding_health,
         "collection": Config.COLLECTION_NAME,
-        "embedding_model": Config.EMBEDDING_MODEL
     }
 @app.post("/v1/chat/completions")
 async def chat_completions(request: ChatCompletionRequest):
     """OpenAI-compatible chat completions endpoint with RAG"""
-    if not app_state.groq_client:
-        raise HTTPException(status_code=500, detail="Groq client not initialized")
     try:
         # Get the last user message for retrieval
@@ -312,16 +342,16 @@ async def chat_completions(request: ChatCompletionRequest):
         else:
             enhanced_messages = request.messages
-        # Convert to Groq format
-        groq_messages = [{"role": msg.role, "content": msg.content} for msg in enhanced_messages]
         if request.stream:
             return StreamingResponse(
-                stream_chat_completion(groq_messages, request),
                 media_type="text/event-stream"
             )
         else:
-            return await create_chat_completion(groq_messages, request)
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
@@ -329,7 +359,7 @@ async def chat_completions(request: ChatCompletionRequest):
 async def create_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> ChatCompletionResponse:
     """Create a non-streaming chat completion"""
     try:
-        response = await app_state.groq_client.chat.completions.create(
             model=request.model,
             messages=messages,
             max_tokens=request.max_tokens,
@@ -338,36 +368,33 @@ async def create_chat_completion(messages: List[Dict], request: ChatCompletionRe
             stream=False
         )
-        # Convert Groq response to OpenAI format
         return ChatCompletionResponse(
-            id=f"chatcmpl-{uuid.uuid4().hex}",
-            created=int(datetime.now().timestamp()),
-            model=request.model,
             choices=[{
-                "index": 0,
                 "message": {
-                    "role": "assistant",
-                    "content": response.choices[0].message.content
                 },
-                "finish_reason": response.choices[0].finish_reason
-            }],
             usage={
                 "prompt_tokens": response.usage.prompt_tokens,
                 "completion_tokens": response.usage.completion_tokens,
                 "total_tokens": response.usage.total_tokens
-            }
         )
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error calling Groq API: {str(e)}")
 async def stream_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> AsyncGenerator[str, None]:
     """Stream chat completion responses"""
     try:
-        completion_id = f"chatcmpl-{uuid.uuid4().hex}"
-        created = int(datetime.now().timestamp())
-        stream = await app_state.groq_client.chat.completions.create(
             model=request.model,
             messages=messages,
             max_tokens=request.max_tokens,
@@ -377,38 +404,26 @@ async def stream_chat_completion(messages: List[Dict], request: ChatCompletionRe
         )
         async for chunk in stream:
-            if chunk.choices and chunk.choices[0].delta:
-                delta = chunk.choices[0].delta
-                chunk_response = ChatCompletionChunk(
-                    id=completion_id,
-                    created=created,
-                    model=request.model,
-                    choices=[{
-                        "index": 0,
-                        "delta": {
-                            "role": delta.role if hasattr(delta, 'role') and delta.role else None,
-                            "content": delta.content if hasattr(delta, 'content') else None
-                        },
-                        "finish_reason": chunk.choices[0].finish_reason
-                    }]
-                )
-                yield f"data: {chunk_response.model_dump_json()}\n\n"
         # Send final chunk
-        final_chunk = ChatCompletionChunk(
-            id=completion_id,
-            created=created,
-            model=request.model,
-            choices=[{
-                "index": 0,
-                "delta": {},
-                "finish_reason": "stop"
-            }]
-        )
-        yield f"data: {final_chunk.model_dump_json()}\n\n"
         yield "data: [DONE]\n\n"
     except Exception as e:
@@ -429,6 +444,9 @@ async def add_document(content: str, metadata: Optional[Dict] = None):
         if app_state.embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
         # Generate embedding for document
         embedding = await app_state.embedding_service.get_document_embedding(content)
@@ -462,6 +480,9 @@ async def batch_add_documents(documents: List[Dict[str, Any]]):
         if app_state.embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
         # Extract texts and metadata
         texts = [doc.get("content", "") for doc in documents]
         metadatas = [doc.get("metadata", {}) for doc in documents]
@@ -507,6 +528,22 @@ async def create_collection():
         from qdrant_client.models import VectorParams, Distance
         await app_state.qdrant_client.create_collection(
             collection_name=Config.COLLECTION_NAME,
             vectors_config=VectorParams(
@@ -518,7 +555,8 @@ async def create_collection():
         return {
             "message": f"Collection '{Config.COLLECTION_NAME}' created successfully",
             "vector_size": app_state.embedding_service.dimension,
-            "distance": "cosine"
         }
     except Exception as e:
@@ -531,11 +569,15 @@ async def get_collection_info():
         if app_state.qdrant_client is None:
             raise HTTPException(status_code=500, detail="Qdrant client is not initialized")
         collection_info = await app_state.qdrant_client.get_collection(Config.COLLECTION_NAME)
         return {
             "name": Config.COLLECTION_NAME,
             "vectors_count": collection_info.vectors_count,
-            "status": collection_info.status
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error getting collection info: {str(e)}")

 from contextlib import asynccontextmanager
 # Third-party imports
+from openai import AsyncOpenAI
 from qdrant_client import AsyncQdrantClient
 from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue
 from sentence_transformers import SentenceTransformer
 # Configuration
 class Config:
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+    GROQ_BASE_URL = os.getenv("GROQ_BASE_URL", "https://api.groq.com/openai/v1")
     QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
     QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
     COLLECTION_NAME = os.getenv("COLLECTION_NAME", "documents")
 class ApplicationState:
     """Application state container"""
     def __init__(self):
+        self.openai_client = None
         self.qdrant_client = None
         self.embedding_service = None
                 print("Error: Embedding service is not initialized")
                 return []
+            # Auto-create collection if it doesn't exist
+            await RAGService._ensure_collection_exists()
             # Get query embedding - all-MiniLM works well without special prefixes
             query_embedding = await app_state.embedding_service.get_query_embedding(query)
             print(f"Error retrieving chunks: {e}")
             return []
+    @staticmethod
+    async def _ensure_collection_exists():
+        """Ensure the collection exists, create if it doesn't"""
+        try:
+            # Check if collection exists
+            collections = await app_state.qdrant_client.get_collections()
+            collection_names = [c.name for c in collections.collections]
+            if Config.COLLECTION_NAME not in collection_names:
+                print(f"Creating collection '{Config.COLLECTION_NAME}' on-demand...")
+                from qdrant_client.models import VectorParams, Distance
+                await app_state.qdrant_client.create_collection(
+                    collection_name=Config.COLLECTION_NAME,
+                    vectors_config=VectorParams(
+                        size=app_state.embedding_service.dimension,
+                        distance=Distance.COSINE
+                    )
+                )
+                print(f"✓ Collection '{Config.COLLECTION_NAME}' created successfully!")
+        except Exception as e:
+            print(f"Warning: Could not ensure collection exists: {e}")
+            # Continue anyway - the operation might still work
     @staticmethod
     def build_context_prompt(query: str, chunks: List[str]) -> str:
         """Build a context-aware prompt with retrieved chunks"""
     return {
         "status": "healthy" if app_state.embedding_service is not None else "unhealthy",
+        "openai_client": "connected" if app_state.openai_client else "not configured",
         "qdrant": qdrant_status,
         "embedding_service": embedding_health,
         "collection": Config.COLLECTION_NAME,
+        "embedding_model": Config.EMBEDDING_MODEL,
+        "groq_endpoint": Config.GROQ_BASE_URL
     }
 @app.post("/v1/chat/completions")
 async def chat_completions(request: ChatCompletionRequest):
     """OpenAI-compatible chat completions endpoint with RAG"""
+    if not app_state.openai_client:
+        raise HTTPException(status_code=500, detail="OpenAI client not initialized")
     try:
         # Get the last user message for retrieval
         else:
             enhanced_messages = request.messages
+        # Convert to OpenAI format
+        openai_messages = [{"role": msg.role, "content": msg.content} for msg in enhanced_messages]
         if request.stream:
             return StreamingResponse(
+                stream_chat_completion(openai_messages, request),
                 media_type="text/event-stream"
             )
         else:
+            return await create_chat_completion(openai_messages, request)
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
 async def create_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> ChatCompletionResponse:
     """Create a non-streaming chat completion"""
     try:
+        response = await app_state.openai_client.chat.completions.create(
             model=request.model,
             messages=messages,
             max_tokens=request.max_tokens,
             stream=False
         )
+        # Convert response to OpenAI format (already compatible)
         return ChatCompletionResponse(
+            id=response.id,
+            created=response.created,
+            model=response.model,
             choices=[{
+                "index": choice.index,
                 "message": {
+                    "role": choice.message.role,
+                    "content": choice.message.content
                 },
+                "finish_reason": choice.finish_reason
+            } for choice in response.choices],
             usage={
                 "prompt_tokens": response.usage.prompt_tokens,
                 "completion_tokens": response.usage.completion_tokens,
                 "total_tokens": response.usage.total_tokens
+            } if response.usage else None
         )
     except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error calling OpenAI API: {str(e)}")
 async def stream_chat_completion(messages: List[Dict], request: ChatCompletionRequest) -> AsyncGenerator[str, None]:
     """Stream chat completion responses"""
     try:
+        stream = await app_state.openai_client.chat.completions.create(
             model=request.model,
             messages=messages,
             max_tokens=request.max_tokens,
         )
         async for chunk in stream:
+            if chunk.choices and len(chunk.choices) > 0:
+                choice = chunk.choices[0]
+                if choice.delta:
+                    chunk_response = ChatCompletionChunk(
+                        id=chunk.id,
+                        created=chunk.created,
+                        model=chunk.model,
+                        choices=[{
+                            "index": choice.index,
+                            "delta": {
+                                "role": choice.delta.role if choice.delta.role else None,
+                                "content": choice.delta.content if choice.delta.content else None
+                            },
+                            "finish_reason": choice.finish_reason
+                        }]
+                    )
+                    yield f"data: {chunk_response.model_dump_json()}\n\n"
         # Send final chunk
         yield "data: [DONE]\n\n"
     except Exception as e:
         if app_state.embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
+        # Auto-create collection if it doesn't exist
+        await RAGService._ensure_collection_exists()
         # Generate embedding for document
         embedding = await app_state.embedding_service.get_document_embedding(content)
         if app_state.embedding_service is None:
             raise HTTPException(status_code=500, detail="Embedding service is not initialized")
+        # Auto-create collection if it doesn't exist
+        await RAGService._ensure_collection_exists()
         # Extract texts and metadata
         texts = [doc.get("content", "") for doc in documents]
         metadatas = [doc.get("metadata", {}) for doc in documents]
         from qdrant_client.models import VectorParams, Distance
+        # Check if collection already exists
+        try:
+            collections = await app_state.qdrant_client.get_collections()
+            collection_names = [c.name for c in collections.collections]
+            if Config.COLLECTION_NAME in collection_names:
+                return {
+                    "message": f"Collection '{Config.COLLECTION_NAME}' already exists",
+                    "vector_size": app_state.embedding_service.dimension,
+                    "distance": "cosine",
+                    "status": "exists"
+                }
+        except Exception as e:
+            print(f"Warning: Could not check existing collections: {e}")
+        # Create the collection
         await app_state.qdrant_client.create_collection(
             collection_name=Config.COLLECTION_NAME,
             vectors_config=VectorParams(
         return {
             "message": f"Collection '{Config.COLLECTION_NAME}' created successfully",
             "vector_size": app_state.embedding_service.dimension,
+            "distance": "cosine",
+            "status": "created"
         }
     except Exception as e:
         if app_state.qdrant_client is None:
             raise HTTPException(status_code=500, detail="Qdrant client is not initialized")
+        # Auto-create collection if it doesn't exist
+        await RAGService._ensure_collection_exists()
         collection_info = await app_state.qdrant_client.get_collection(Config.COLLECTION_NAME)
         return {
             "name": Config.COLLECTION_NAME,
             "vectors_count": collection_info.vectors_count,
+            "status": collection_info.status,
+            "vector_size": app_state.embedding_service.dimension if app_state.embedding_service else "unknown"
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error getting collection info: {str(e)}")

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
 fastapi==0.104.1
 uvicorn[standard]==0.24.0
-groq==0.4.1
 qdrant-client==1.7.0
 sentence-transformers==2.2.2
 torch==2.1.1

 fastapi==0.104.1
 uvicorn[standard]==0.24.0
+openai==1.3.7
 qdrant-client==1.7.0
 sentence-transformers==2.2.2
 torch==2.1.1