Spaces:

sk3078
/

Rag_chatbot

Sleeping

App Files Files Community

suhail commited on Dec 23, 2025

Commit

b2f2d4d

1 Parent(s): c4a0718

final: switch to OpenRouter for chat & embeddings (low cost + full RAG working)

Browse files

Files changed (15) hide show

.env +16 -4
app/__pycache__/main.cpython-313.pyc +0 -0
app/core/__pycache__/config.cpython-313.pyc +0 -0
app/core/config.py +1 -1
app/ingestion/embedder.py +164 -54
app/main.py +3 -1
app/rag/__pycache__/generator.cpython-313.pyc +0 -0
app/rag/__pycache__/retriever.cpython-313.pyc +0 -0
app/rag/generator.py +171 -48
app/rag/retriever.py +27 -1
app/services/__pycache__/agent_service.cpython-313.pyc +0 -0
app/services/agent_service.py +6 -3
app/utils/__pycache__/embeddings.cpython-313.pyc +0 -0
app/utils/embeddings.py +5 -8
requirements.txt +3 -1

.env CHANGED Viewed

@@ -14,15 +14,27 @@ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.K6s2NFJR
 QDRANT_COLLECTION_NAME=test-clustor
-OPENAI_API_KEY=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
 BOOK_SOURCE_DIR=../website/docs/modules
 INGESTION_CHUNK_SIZE=400
 INGESTION_OVERLAP=50
-OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
 # .env
-VITE_API_URL=http://127.0.0.1:8000/agent/query
 # Add other vars if needed
-VITE_APP_NAME=My Book Agent

 QDRANT_COLLECTION_NAME=test-clustor
+OPENAI_API_KEY=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
+# OPENAI_API_KEY=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
 BOOK_SOURCE_DIR=../website/docs/modules
 INGESTION_CHUNK_SIZE=400
 INGESTION_OVERLAP=50
+# OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
 # .env
+VITE_API_URL=https://sk3078-rag-chatbot.hf.space/agent/query
 # Add other vars if needed
+VITE_APP_NAME=My Book Agent
+OPEN_ROUTER_API_KEY=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
+COHERE_API_KEY=bS3Uu3AlGJ98UG8lzfAUOCMAz3bm1InmC2V8Kyud
+COHERE_EMBEDDING_MODEL=embed-english-v3.0

app/__pycache__/main.cpython-313.pyc CHANGED Viewed

Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ

app/core/__pycache__/config.cpython-313.pyc CHANGED Viewed

Binary files a/app/core/__pycache__/config.cpython-313.pyc and b/app/core/__pycache__/config.cpython-313.pyc differ

app/core/config.py CHANGED Viewed

@@ -22,7 +22,7 @@ class Settings(BaseSettings):
     BOOK_SOURCE_DIR: str | None = None
     INGESTION_CHUNK_SIZE: int | None = None
     INGESTION_OVERLAP: int | None = None
-    OPENAI_EMBEDDING_MODEL: str ="gpt-3.5-turbo"
     class Config:
         env_file = BASE_DIR / ".env"

     BOOK_SOURCE_DIR: str | None = None
     INGESTION_CHUNK_SIZE: int | None = None
     INGESTION_OVERLAP: int | None = None
+    COHERE_EMBEDDING_MODEL: str ="embed-english-v3.0"
     class Config:
         env_file = BASE_DIR / ".env"

app/ingestion/embedder.py CHANGED Viewed

@@ -1,94 +1,204 @@
 """
 Embedding generation module for the book ingestion pipeline.
-This module provides functions to generate embeddings using OpenAI API.
 """
 import os
 import logging
-import asyncio
-import openai
-from typing import List, Dict, Any, Union
 from tenacity import retry, stop_after_attempt, wait_exponential
 from openai import AsyncOpenAI
-from app.core.config import settings
 logger = logging.getLogger(__name__)
-# Initialize OpenAI client with API key from environment
-client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 async def generate_embedding(text: str) -> List[float]:
-    """
-    Generates an embedding vector for a text chunk.
-    Args:
-        text: Text to generate embedding for
-    Returns:
-        List of floats representing the embedding vector (1536 dimensions)
-    Error handling: Raises exception if API call fails, includes retry logic
-    """
     try:
-        # Use the embedding model specified in environment or default
-        model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
         response = await client.embeddings.create(
             input=text,
             model=model
         )
         embedding = response.data[0].embedding
-        logger.info(f"Generated embedding of size {len(embedding)} for text of length {len(text)}")
         return embedding
-    except openai.APIError as e:
-        logger.error(f"OpenAI API error when generating embedding: {e}")
-        raise
     except Exception as e:
-        logger.error(f"Error generating embedding: {e}")
         raise
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 async def batch_generate_embeddings(texts: List[str]) -> List[List[float]]:
-    """
-    Generates embeddings for multiple texts in a batch.
-    Args:
-        texts: List of texts to generate embeddings for
-    Returns:
-        List of embedding vectors (each a list of floats)
-    Error handling: Raises exception if API call fails, includes retry logic
-    """
     if not texts:
         return []
     try:
-        # Use the embedding model specified in environment or default
-        model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
-        # Note: OpenAI has a limit on batch sizes, typically up to 2048 texts per request
-        # For simplicity, we'll handle all texts in one call, but in production
-        # you'd want to chunk the requests based on API limits
         response = await client.embeddings.create(
-            input=texts,
             model=model
         )
         embeddings = [item.embedding for item in response.data]
-        logger.info(f"Generated {len(embeddings)} embeddings in batch")
-        return embeddings
-    except openai.APIError as e:
-        logger.error(f"OpenAI API error when generating batch embeddings: {e}")
-        raise
     except Exception as e:
-        logger.error(f"Error generating batch embeddings: {e}")
         raise

+# """
+# Embedding generation module for the book ingestion pipeline.
+# This module provides functions to generate embeddings using OpenAI API.
+# """
+# import os
+# import logging
+# import asyncio
+# import openai
+# from typing import List, Dict, Any, Union
+# from tenacity import retry, stop_after_attempt, wait_exponential
+# from openai import AsyncOpenAI
+# from app.core.config import settings
+# logger = logging.getLogger(__name__)
+# # Initialize OpenAI client with API key from environment
+# client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
+# @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
+# async def generate_embedding(text: str) -> List[float]:
+#     """
+#     Generates an embedding vector for a text chunk.
+#     Args:
+#         text: Text to generate embedding for
+#     Returns:
+#         List of floats representing the embedding vector (1536 dimensions)
+#     Error handling: Raises exception if API call fails, includes retry logic
+#     """
+#     try:
+#         # Use the embedding model specified in environment or default
+#         model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
+#         response = await client.embeddings.create(
+#             input=text,
+#             model=model
+#         )
+#         embedding = response.data[0].embedding
+#         logger.info(f"Generated embedding of size {len(embedding)} for text of length {len(text)}")
+#         return embedding
+#     except openai.APIError as e:
+#         logger.error(f"OpenAI API error when generating embedding: {e}")
+#         raise
+#     except Exception as e:
+#         logger.error(f"Error generating embedding: {e}")
+#         raise
+# @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
+# async def batch_generate_embeddings(texts: List[str]) -> List[List[float]]:
+#     """
+#     Generates embeddings for multiple texts in a batch.
+#     Args:
+#         texts: List of texts to generate embeddings for
+#     Returns:
+#         List of embedding vectors (each a list of floats)
+#     Error handling: Raises exception if API call fails, includes retry logic
+#     """
+#     if not texts:
+#         return []
+#     try:
+#         # Use the embedding model specified in environment or default
+#         model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
+#         # Note: OpenAI has a limit on batch sizes, typically up to 2048 texts per request
+#         # For simplicity, we'll handle all texts in one call, but in production
+#         # you'd want to chunk the requests based on API limits
+#         response = await client.embeddings.create(
+#             input=texts,
+#             model=model
+#         )
+#         embeddings = [item.embedding for item in response.data]
+#         logger.info(f"Generated {len(embeddings)} embeddings in batch")
+#         return embeddings
+#     except openai.APIError as e:
+#         logger.error(f"OpenAI API error when generating batch embeddings: {e}")
+#         raise
+#     except Exception as e:
+#         logger.error(f"Error generating batch embeddings: {e}")
+#         raise
 """
 Embedding generation module for the book ingestion pipeline.
+This module provides functions to generate embeddings using OpenRouter (OpenAI-compatible API).
 """
 import os
 import logging
+from typing import List
 from tenacity import retry, stop_after_attempt, wait_exponential
 from openai import AsyncOpenAI
 logger = logging.getLogger(__name__)
+# OpenRouter client (embeddings bhi support karta hai)
+client = AsyncOpenAI(
+    api_key=os.getenv("OPENAI_API_KEY"),  # Tumhara OpenRouter key
+    base_url="https://openrouter.ai/api/v1"  # Yeh zaroori hai embeddings ke liye bhi
+)
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 async def generate_embedding(text: str) -> List[float]:
+    if not text.strip():
+        logger.warning("Empty text provided for embedding")
+        return [0.0] * 1536  # OpenAI models mostly 1536 dim
     try:
+        # OpenRouter pe available embedding models
+        model = os.getenv("OPENAI_EMBEDDING_MODEL", "openai/text-embedding-3-small")
+        # Recommended:
+        # "openai/text-embedding-3-small"      # sasta & acha
+        # "openai/text-embedding-3-large"      # best quality
+        # "qwen/qwen3-embedding-8b"            # multilingual & powerful
+        # "mistralai/mistral-embed-2312"       # good alternative
+        # "google/gemini-embedding-001"        # Google ka
         response = await client.embeddings.create(
             input=text,
             model=model
         )
         embedding = response.data[0].embedding
+        logger.info(f"Generated OpenRouter embedding ({model}) | dim: {len(embedding)} | text len: {len(text)}")
         return embedding
     except Exception as e:
+        logger.error(f"Error generating embedding (OpenRouter): {e}")
         raise
 @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
 async def batch_generate_embeddings(texts: List[str]) -> List[List[float]]:
     if not texts:
         return []
+    valid_texts = [t for t in texts if t.strip()]
+    if not valid_texts:
+        return [[0.0] * 1536] * len(texts)
     try:
+        model = os.getenv("OPENAI_EMBEDDING_MODEL", "openai/text-embedding-3-small")
         response = await client.embeddings.create(
+            input=valid_texts,
             model=model
         )
         embeddings = [item.embedding for item in response.data]
+        logger.info(f"Generated {len(embeddings)} OpenRouter embeddings in batch ({model})")
+        # Rebuild full list with zero vectors for empty texts
+        result = []
+        embed_idx = 0
+        for text in texts:
+            if text.strip():
+                result.append(embeddings[embed_idx])
+                embed_idx += 1
+            else:
+                result.append([0.0] * 1536)
+        return result
     except Exception as e:
+        logger.error(f"Error generating batch embeddings (OpenRouter): {e}")
         raise

app/main.py CHANGED Viewed

@@ -20,7 +20,9 @@ app = FastAPI(
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # In production, change this to your specific frontend URL
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],

 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*",
+                   "http://localhost:3000",
+                   "https://hacathoon1-deploy.vercel.app/"],  # In production, change this to your specific frontend URL
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],

app/rag/__pycache__/generator.cpython-313.pyc CHANGED Viewed

Binary files a/app/rag/__pycache__/generator.cpython-313.pyc and b/app/rag/__pycache__/generator.cpython-313.pyc differ

app/rag/__pycache__/retriever.cpython-313.pyc CHANGED Viewed

Binary files a/app/rag/__pycache__/retriever.cpython-313.pyc and b/app/rag/__pycache__/retriever.cpython-313.pyc differ

app/rag/generator.py CHANGED Viewed

@@ -1,41 +1,185 @@
 """
 Answer generation module for the RAG (Retrieval Augmented Generation) system.
-This module provides async methods to generate answers using OpenAI's
-Chat Completion API based on the provided context and user question.
 """
 from typing import Dict, Any, Optional
-import openai
 import logging
-from openai import AsyncOpenAI
-from dotenv import load_dotenv
 import os
-from app.core.config import settings
 logger = logging.getLogger(__name__)
-# Load environment variables
-load_dotenv()
-# Initialize the OpenAI client
-client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-# Use the model specified in the settings, defaulting to gpt-3.5-turbo
-OPENAI_MODEL = getattr(settings, "OPENAI_MODEL", "gpt-3.5-turbo")
 class AnswerGenerator:
     """
-    Generates answers using OpenAI's Chat Completion API based on context.
     """
     def __init__(self, temperature: float = 0.3):
         """
         Initialize the AnswerGenerator with a specific temperature.
-        Args:
-            temperature: Controls randomness in generation (0.0-1.0, lower means less random)
         """
         if temperature > 0.3:
             logger.warning(f"Temperature {temperature} is higher than recommended maximum of 0.3 for RAG application")
@@ -48,15 +192,7 @@ class AnswerGenerator:
         max_tokens: int = 1000
     ) -> Optional[Dict[str, Any]]:
         """
-        Generate an answer using OpenAI Chat Completion API.
-        Args:
-            system_message: The system message providing context and instructions
-            user_message: The user message containing the question and context
-            max_tokens: Maximum number of tokens to generate (default: 1000)
-        Returns:
-            Dictionary containing the response or None if generation failed
         """
         try:
             response = await client.chat.completions.create(
@@ -67,11 +203,10 @@ class AnswerGenerator:
                 ],
                 temperature=self.temperature,
                 max_tokens=max_tokens,
-                timeout=30  # 30 second timeout
             )
-            # Extract the answer from the response
-            answer = response.choices[0].message.content
             usage = {
                 "prompt_tokens": response.usage.prompt_tokens if response.usage else 0,
                 "completion_tokens": response.usage.completion_tokens if response.usage else 0,
@@ -84,25 +219,16 @@ class AnswerGenerator:
                 "model": response.model
             }
-            logger.info(f"Successfully generated answer with {usage['total_tokens']} total tokens used")
             return result
-        except openai.APIError as e:
-            logger.error(f"OpenAI API error during answer generation: {e}")
-            return None
-        except Exception as e:
-            logger.error(f"Unexpected error during answer generation: {e}")
             return None
     async def generate_answer_simple(self, prompt: str) -> Optional[str]:
         """
         Generate an answer using a simple prompt format.
-        Args:
-            prompt: Complete prompt string including system instructions and user question
-        Returns:
-            Generated answer text or None if generation failed
         """
         try:
             response = await client.chat.completions.create(
@@ -111,16 +237,13 @@ class AnswerGenerator:
                     {"role": "user", "content": prompt}
                 ],
                 temperature=self.temperature,
-                timeout=30  # 30 second timeout
             )
-            answer = response.choices[0].message.content
-            logger.info(f"Successfully generated answer with model {response.model}")
             return answer
-        except openai.APIError as e:
-            logger.error(f"OpenAI API error during simple answer generation: {e}")
-            return None
         except Exception as e:
-            logger.error(f"Unexpected error during simple answer generation: {e}")
             return None

+# """
+# Answer generation module for the RAG (Retrieval Augmented Generation) system.
+# This module provides async methods to generate answers using OpenAI's
+# Chat Completion API based on the provided context and user question.
+# """
+# from typing import Dict, Any, Optional
+# import openai
+# import logging
+# from openai import AsyncOpenAI
+# from dotenv import load_dotenv
+# import os
+# from app.core.config import settings
+# logger = logging.getLogger(__name__)
+# # Load environment variables
+# load_dotenv()
+# # Initialize the OpenAI client
+# client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+# # Use the model specified in the settings, defaulting to gpt-3.5-turbo
+# OPENAI_MODEL = getattr(settings, "OPENAI_MODEL", "gpt-3.5-turbo")
+# class AnswerGenerator:
+#     """
+#     Generates answers using OpenAI's Chat Completion API based on context.
+#     """
+#     def __init__(self, temperature: float = 0.3):
+#         """
+#         Initialize the AnswerGenerator with a specific temperature.
+#         Args:
+#             temperature: Controls randomness in generation (0.0-1.0, lower means less random)
+#         """
+#         if temperature > 0.3:
+#             logger.warning(f"Temperature {temperature} is higher than recommended maximum of 0.3 for RAG application")
+#         self.temperature = temperature
+#     async def generate_answer(
+#         self,
+#         system_message: str,
+#         user_message: str,
+#         max_tokens: int = 1000
+#     ) -> Optional[Dict[str, Any]]:
+#         """
+#         Generate an answer using OpenAI Chat Completion API.
+#         Args:
+#             system_message: The system message providing context and instructions
+#             user_message: The user message containing the question and context
+#             max_tokens: Maximum number of tokens to generate (default: 1000)
+#         Returns:
+#             Dictionary containing the response or None if generation failed
+#         """
+#         try:
+#             response = await client.chat.completions.create(
+#                 model=OPENAI_MODEL,
+#                 messages=[
+#                     {"role": "system", "content": system_message},
+#                     {"role": "user", "content": user_message}
+#                 ],
+#                 temperature=self.temperature,
+#                 max_tokens=max_tokens,
+#                 timeout=30  # 30 second timeout
+#             )
+#             # Extract the answer from the response
+#             answer = response.choices[0].message.content
+#             usage = {
+#                 "prompt_tokens": response.usage.prompt_tokens if response.usage else 0,
+#                 "completion_tokens": response.usage.completion_tokens if response.usage else 0,
+#                 "total_tokens": response.usage.total_tokens if response.usage else 0
+#             }
+#             result = {
+#                 "answer": answer,
+#                 "usage": usage,
+#                 "model": response.model
+#             }
+#             logger.info(f"Successfully generated answer with {usage['total_tokens']} total tokens used")
+#             return result
+#         except openai.APIError as e:
+#             logger.error(f"OpenAI API error during answer generation: {e}")
+#             return None
+#         except Exception as e:
+#             logger.error(f"Unexpected error during answer generation: {e}")
+#             return None
+#     async def generate_answer_simple(self, prompt: str) -> Optional[str]:
+#         """
+#         Generate an answer using a simple prompt format.
+#         Args:
+#             prompt: Complete prompt string including system instructions and user question
+#         Returns:
+#             Generated answer text or None if generation failed
+#         """
+#         try:
+#             response = await client.chat.completions.create(
+#                 model=OPENAI_MODEL,
+#                 messages=[
+#                     {"role": "user", "content": prompt}
+#                 ],
+#                 temperature=self.temperature,
+#                 timeout=30  # 30 second timeout
+#             )
+#             answer = response.choices[0].message.content
+#             logger.info(f"Successfully generated answer with model {response.model}")
+#             return answer
+#         except openai.APIError as e:
+#             logger.error(f"OpenAI API error during simple answer generation: {e}")
+#             return None
+#         except Exception as e:
+#             logger.error(f"Unexpected error during simple answer generation: {e}")
+#             return None
 """
 Answer generation module for the RAG (Retrieval Augmented Generation) system.
+This module provides async methods to generate answers using OpenRouter
+(via OpenAI-compatible API) based on the provided context and user question.
 """
 from typing import Dict, Any, Optional
 import logging
 import os
+from openai import AsyncOpenAI  # OpenAI client works with OpenRouter too!
 logger = logging.getLogger(__name__)
+# Initialize the OpenAI client with OpenRouter base URL
+client = AsyncOpenAI(
+    api_key=os.getenv("OPENAI_API_KEY"),           # Yeh tumhara OpenRouter key hoga
+    base_url="https://openrouter.ai/api/v1"        # <-- Yeh line sabse zaroori hai!
+)
+# Model ko OpenRouter pe available powerful & sasta model use karo
+# Recommended options (2025 Dec ke hisaab se):
+OPENAI_MODEL = os.getenv(
+    "OPENAI_MODEL",
+    "meta-llama/llama-3.1-70b-instruct"        # Best balance: smart + sasta
+    # Alternatives:
+    # "meta-llama/llama-3.1-8b-instruct:free"   # Completely free
+    # "google/gemini-flash-1.5"                # Fast & reliable
+    # "anthropic/claude-3.5-sonnet"            # Top quality (thoda costly)
+)
 class AnswerGenerator:
     """
+    Generates answers using OpenRouter (OpenAI-compatible) API based on context.
     """
     def __init__(self, temperature: float = 0.3):
         """
         Initialize the AnswerGenerator with a specific temperature.
         """
         if temperature > 0.3:
             logger.warning(f"Temperature {temperature} is higher than recommended maximum of 0.3 for RAG application")
         max_tokens: int = 1000
     ) -> Optional[Dict[str, Any]]:
         """
+        Generate an answer using OpenRouter Chat Completion API.
         """
         try:
             response = await client.chat.completions.create(
                 ],
                 temperature=self.temperature,
                 max_tokens=max_tokens,
+                timeout=60  # OpenRouter thoda slow ho sakta hai, timeout badha diya
             )
+            answer = response.choices[0].message.content.strip()
             usage = {
                 "prompt_tokens": response.usage.prompt_tokens if response.usage else 0,
                 "completion_tokens": response.usage.completion_tokens if response.usage else 0,
                 "model": response.model
             }
+            logger.info(f"Successfully generated answer using {response.model} | Tokens: {usage['total_tokens']}")
             return result
+        except Exception as e:  # Broad catch kyuki OpenRouter se alag error format aa sakte hain
+            logger.error(f"Error during answer generation (OpenRouter): {e}")
             return None
     async def generate_answer_simple(self, prompt: str) -> Optional[str]:
         """
         Generate an answer using a simple prompt format.
         """
         try:
             response = await client.chat.completions.create(
                     {"role": "user", "content": prompt}
                 ],
                 temperature=self.temperature,
+                timeout=60
             )
+            answer = response.choices[0].message.content.strip()
+            logger.info(f"Simple answer generated with {response.model}")
             return answer
         except Exception as e:
+            logger.error(f"Error during simple answer generation (OpenRouter): {e}")
             return None

app/rag/retriever.py CHANGED Viewed

@@ -81,4 +81,30 @@ class VectorRetriever:
             return results
         except Exception as e:
             logger.error(f"Error retrieving vectors by ID: {e}")
-            return []

             return results
         except Exception as e:
             logger.error(f"Error retrieving vectors by ID: {e}")
+            return []
+# ==========================================================

app/services/__pycache__/agent_service.cpython-313.pyc CHANGED Viewed

Binary files a/app/services/__pycache__/agent_service.cpython-313.pyc and b/app/services/__pycache__/agent_service.cpython-313.pyc differ

app/services/agent_service.py CHANGED Viewed

@@ -7,8 +7,11 @@ import os
 logger = logging.getLogger(__name__)
-client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
 class AgentService:
     """Service class for the agent that intelligently routes queries based on context."""
@@ -83,7 +86,7 @@ Answer clearly and helpfully.
         try:
             response = client.chat.completions.create(
-                model="gpt-4o-mini",  # Ya gpt-3.5-turbo rakh
                 messages=[
                     {"role": "system", "content": "You are a helpful course assistant. Be accurate and friendly."},
                     {"role": "user", "content": prompt}
@@ -157,7 +160,7 @@ Answer clearly and helpfully.
         try:
             response = client.chat.completions.create(
-                model="gpt-4o-mini",
                 messages=[
                     {"role": "system", "content": "You are a friendly and helpful AI assistant."},
                     {"role": "user", "content": request.question}

 logger = logging.getLogger(__name__)
+client = OpenAI(
+    api_key=os.getenv("OPENAI_API_KEY"),  # yeh OpenRouter ka key rahega
+    base_url="https://openrouter.ai/api/v1"  # <-- Yeh line add kar do (sabse zaroori!)
+)
 class AgentService:
     """Service class for the agent that intelligently routes queries based on context."""
         try:
             response = client.chat.completions.create(
+                model="mistralai/devstral-2512:free",  #
                 messages=[
                     {"role": "system", "content": "You are a helpful course assistant. Be accurate and friendly."},
                     {"role": "user", "content": prompt}
         try:
             response = client.chat.completions.create(
+                model="mistralai/devstral-2512:free",
                 messages=[
                     {"role": "system", "content": "You are a friendly and helpful AI assistant."},
                     {"role": "user", "content": request.question}

app/utils/__pycache__/embeddings.cpython-313.pyc CHANGED Viewed

Binary files a/app/utils/__pycache__/embeddings.cpython-313.pyc and b/app/utils/__pycache__/embeddings.cpython-313.pyc differ

app/utils/embeddings.py CHANGED Viewed

@@ -14,13 +14,10 @@ from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
-# Initialize the OpenAI client
-client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-# Get the embedding model from environment variables, default to the one used in ingestion
-EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
 async def get_embeddings(texts: Union[str, List[str]]) -> List[List[float]]:
     """
     Convert text(s) to embeddings using the OpenAI embedding model.
@@ -38,7 +35,7 @@ async def get_embeddings(texts: Union[str, List[str]]) -> List[List[float]]:
     # Create embeddings using OpenAI API
     response = await client.embeddings.create(
         input=texts,
-        model=EMBEDDING_MODEL
     )
     # Extract and return the embeddings

 # Load environment variables
 load_dotenv()
+client = AsyncOpenAI(
+    api_key=os.getenv("OPENAI_API_KEY"),  # Tumhara OpenRouter key
+    base_url="https://openrouter.ai/api/v1"  # Yeh zaroori hai embeddings ke liye bhi
+)
 async def get_embeddings(texts: Union[str, List[str]]) -> List[List[float]]:
     """
     Convert text(s) to embeddings using the OpenAI embedding model.
     # Create embeddings using OpenAI API
     response = await client.embeddings.create(
         input=texts,
+        model = os.getenv("OPENAI_EMBEDDING_MODEL", "openai/text-embedding-3-small")  # Use the model from env variable
     )
     # Extract and return the embeddings

requirements.txt CHANGED Viewed

@@ -6,4 +6,6 @@ pydantic-settings
 asyncpg
 qdrant-client
 openai
-python-markdown

 asyncpg
 qdrant-client
 openai
+python-markdown
+langchain-cohere
+cohere