suhail commited on
Commit
b2f2d4d
·
1 Parent(s): c4a0718

final: switch to OpenRouter for chat & embeddings (low cost + full RAG working)

Browse files
.env CHANGED
@@ -14,15 +14,27 @@ QDRANT_API_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhY2Nlc3MiOiJtIn0.K6s2NFJR
14
 
15
  QDRANT_COLLECTION_NAME=test-clustor
16
 
17
- OPENAI_API_KEY=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
 
 
 
18
 
19
  BOOK_SOURCE_DIR=../website/docs/modules
20
  INGESTION_CHUNK_SIZE=400
21
  INGESTION_OVERLAP=50
22
- OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
23
 
24
 
25
  # .env
26
- VITE_API_URL=http://127.0.0.1:8000/agent/query
27
  # Add other vars if needed
28
- VITE_APP_NAME=My Book Agent
 
 
 
 
 
 
 
 
 
 
14
 
15
  QDRANT_COLLECTION_NAME=test-clustor
16
 
17
+
18
+ OPENAI_API_KEY=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
19
+
20
+ # OPENAI_API_KEY=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
21
 
22
  BOOK_SOURCE_DIR=../website/docs/modules
23
  INGESTION_CHUNK_SIZE=400
24
  INGESTION_OVERLAP=50
25
+ # OPENAI_EMBEDDING_MODEL=text-embedding-ada-002
26
 
27
 
28
  # .env
29
+ VITE_API_URL=https://sk3078-rag-chatbot.hf.space/agent/query
30
  # Add other vars if needed
31
+ VITE_APP_NAME=My Book Agent
32
+
33
+
34
+ OPEN_ROUTER_API_KEY=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
35
+
36
+
37
+
38
+ COHERE_API_KEY=bS3Uu3AlGJ98UG8lzfAUOCMAz3bm1InmC2V8Kyud
39
+
40
+ COHERE_EMBEDDING_MODEL=embed-english-v3.0
app/__pycache__/main.cpython-313.pyc CHANGED
Binary files a/app/__pycache__/main.cpython-313.pyc and b/app/__pycache__/main.cpython-313.pyc differ
 
app/core/__pycache__/config.cpython-313.pyc CHANGED
Binary files a/app/core/__pycache__/config.cpython-313.pyc and b/app/core/__pycache__/config.cpython-313.pyc differ
 
app/core/config.py CHANGED
@@ -22,7 +22,7 @@ class Settings(BaseSettings):
22
  BOOK_SOURCE_DIR: str | None = None
23
  INGESTION_CHUNK_SIZE: int | None = None
24
  INGESTION_OVERLAP: int | None = None
25
- OPENAI_EMBEDDING_MODEL: str ="gpt-3.5-turbo"
26
 
27
  class Config:
28
  env_file = BASE_DIR / ".env"
 
22
  BOOK_SOURCE_DIR: str | None = None
23
  INGESTION_CHUNK_SIZE: int | None = None
24
  INGESTION_OVERLAP: int | None = None
25
+ COHERE_EMBEDDING_MODEL: str ="embed-english-v3.0"
26
 
27
  class Config:
28
  env_file = BASE_DIR / ".env"
app/ingestion/embedder.py CHANGED
@@ -1,94 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  Embedding generation module for the book ingestion pipeline.
3
 
4
- This module provides functions to generate embeddings using OpenAI API.
5
  """
6
 
7
  import os
8
  import logging
9
- import asyncio
10
- import openai
11
- from typing import List, Dict, Any, Union
12
  from tenacity import retry, stop_after_attempt, wait_exponential
13
  from openai import AsyncOpenAI
14
- from app.core.config import settings
15
 
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
- # Initialize OpenAI client with API key from environment
20
- client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
 
 
 
21
 
22
 
23
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
24
  async def generate_embedding(text: str) -> List[float]:
25
- """
26
- Generates an embedding vector for a text chunk.
27
-
28
- Args:
29
- text: Text to generate embedding for
30
-
31
- Returns:
32
- List of floats representing the embedding vector (1536 dimensions)
33
 
34
- Error handling: Raises exception if API call fails, includes retry logic
35
- """
36
  try:
37
- # Use the embedding model specified in environment or default
38
- model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
39
-
 
 
 
 
 
 
40
  response = await client.embeddings.create(
41
  input=text,
42
  model=model
43
  )
44
-
45
  embedding = response.data[0].embedding
46
- logger.info(f"Generated embedding of size {len(embedding)} for text of length {len(text)}")
47
  return embedding
48
-
49
- except openai.APIError as e:
50
- logger.error(f"OpenAI API error when generating embedding: {e}")
51
- raise
52
  except Exception as e:
53
- logger.error(f"Error generating embedding: {e}")
54
  raise
55
 
56
 
57
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
58
  async def batch_generate_embeddings(texts: List[str]) -> List[List[float]]:
59
- """
60
- Generates embeddings for multiple texts in a batch.
61
-
62
- Args:
63
- texts: List of texts to generate embeddings for
64
-
65
- Returns:
66
- List of embedding vectors (each a list of floats)
67
-
68
- Error handling: Raises exception if API call fails, includes retry logic
69
- """
70
  if not texts:
71
  return []
72
-
 
 
 
 
73
  try:
74
- # Use the embedding model specified in environment or default
75
- model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
76
-
77
- # Note: OpenAI has a limit on batch sizes, typically up to 2048 texts per request
78
- # For simplicity, we'll handle all texts in one call, but in production
79
- # you'd want to chunk the requests based on API limits
80
  response = await client.embeddings.create(
81
- input=texts,
82
  model=model
83
  )
84
-
85
  embeddings = [item.embedding for item in response.data]
86
- logger.info(f"Generated {len(embeddings)} embeddings in batch")
87
- return embeddings
88
-
89
- except openai.APIError as e:
90
- logger.error(f"OpenAI API error when generating batch embeddings: {e}")
91
- raise
 
 
 
 
 
 
 
92
  except Exception as e:
93
- logger.error(f"Error generating batch embeddings: {e}")
94
  raise
 
1
+ # """
2
+ # Embedding generation module for the book ingestion pipeline.
3
+
4
+ # This module provides functions to generate embeddings using OpenAI API.
5
+ # """
6
+
7
+ # import os
8
+ # import logging
9
+ # import asyncio
10
+ # import openai
11
+ # from typing import List, Dict, Any, Union
12
+ # from tenacity import retry, stop_after_attempt, wait_exponential
13
+ # from openai import AsyncOpenAI
14
+ # from app.core.config import settings
15
+
16
+
17
+ # logger = logging.getLogger(__name__)
18
+
19
+ # # Initialize OpenAI client with API key from environment
20
+ # client = AsyncOpenAI(api_key=settings.OPENAI_API_KEY)
21
+
22
+
23
+ # @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
24
+ # async def generate_embedding(text: str) -> List[float]:
25
+ # """
26
+ # Generates an embedding vector for a text chunk.
27
+
28
+ # Args:
29
+ # text: Text to generate embedding for
30
+
31
+ # Returns:
32
+ # List of floats representing the embedding vector (1536 dimensions)
33
+
34
+ # Error handling: Raises exception if API call fails, includes retry logic
35
+ # """
36
+ # try:
37
+ # # Use the embedding model specified in environment or default
38
+ # model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
39
+
40
+ # response = await client.embeddings.create(
41
+ # input=text,
42
+ # model=model
43
+ # )
44
+
45
+ # embedding = response.data[0].embedding
46
+ # logger.info(f"Generated embedding of size {len(embedding)} for text of length {len(text)}")
47
+ # return embedding
48
+
49
+ # except openai.APIError as e:
50
+ # logger.error(f"OpenAI API error when generating embedding: {e}")
51
+ # raise
52
+ # except Exception as e:
53
+ # logger.error(f"Error generating embedding: {e}")
54
+ # raise
55
+
56
+
57
+ # @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
58
+ # async def batch_generate_embeddings(texts: List[str]) -> List[List[float]]:
59
+ # """
60
+ # Generates embeddings for multiple texts in a batch.
61
+
62
+ # Args:
63
+ # texts: List of texts to generate embeddings for
64
+
65
+ # Returns:
66
+ # List of embedding vectors (each a list of floats)
67
+
68
+ # Error handling: Raises exception if API call fails, includes retry logic
69
+ # """
70
+ # if not texts:
71
+ # return []
72
+
73
+ # try:
74
+ # # Use the embedding model specified in environment or default
75
+ # model = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
76
+
77
+ # # Note: OpenAI has a limit on batch sizes, typically up to 2048 texts per request
78
+ # # For simplicity, we'll handle all texts in one call, but in production
79
+ # # you'd want to chunk the requests based on API limits
80
+ # response = await client.embeddings.create(
81
+ # input=texts,
82
+ # model=model
83
+ # )
84
+
85
+ # embeddings = [item.embedding for item in response.data]
86
+ # logger.info(f"Generated {len(embeddings)} embeddings in batch")
87
+ # return embeddings
88
+
89
+ # except openai.APIError as e:
90
+ # logger.error(f"OpenAI API error when generating batch embeddings: {e}")
91
+ # raise
92
+ # except Exception as e:
93
+ # logger.error(f"Error generating batch embeddings: {e}")
94
+ # raise
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
  """
120
  Embedding generation module for the book ingestion pipeline.
121
 
122
+ This module provides functions to generate embeddings using OpenRouter (OpenAI-compatible API).
123
  """
124
 
125
  import os
126
  import logging
127
+ from typing import List
 
 
128
  from tenacity import retry, stop_after_attempt, wait_exponential
129
  from openai import AsyncOpenAI
 
130
 
131
 
132
  logger = logging.getLogger(__name__)
133
 
134
+ # OpenRouter client (embeddings bhi support karta hai)
135
+ client = AsyncOpenAI(
136
+ api_key=os.getenv("OPENAI_API_KEY"), # Tumhara OpenRouter key
137
+ base_url="https://openrouter.ai/api/v1" # Yeh zaroori hai embeddings ke liye bhi
138
+ )
139
 
140
 
141
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
142
  async def generate_embedding(text: str) -> List[float]:
143
+ if not text.strip():
144
+ logger.warning("Empty text provided for embedding")
145
+ return [0.0] * 1536 # OpenAI models mostly 1536 dim
 
 
 
 
 
146
 
 
 
147
  try:
148
+ # OpenRouter pe available embedding models
149
+ model = os.getenv("OPENAI_EMBEDDING_MODEL", "openai/text-embedding-3-small")
150
+ # Recommended:
151
+ # "openai/text-embedding-3-small" # sasta & acha
152
+ # "openai/text-embedding-3-large" # best quality
153
+ # "qwen/qwen3-embedding-8b" # multilingual & powerful
154
+ # "mistralai/mistral-embed-2312" # good alternative
155
+ # "google/gemini-embedding-001" # Google ka
156
+
157
  response = await client.embeddings.create(
158
  input=text,
159
  model=model
160
  )
161
+
162
  embedding = response.data[0].embedding
163
+ logger.info(f"Generated OpenRouter embedding ({model}) | dim: {len(embedding)} | text len: {len(text)}")
164
  return embedding
165
+
 
 
 
166
  except Exception as e:
167
+ logger.error(f"Error generating embedding (OpenRouter): {e}")
168
  raise
169
 
170
 
171
  @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
172
  async def batch_generate_embeddings(texts: List[str]) -> List[List[float]]:
 
 
 
 
 
 
 
 
 
 
 
173
  if not texts:
174
  return []
175
+
176
+ valid_texts = [t for t in texts if t.strip()]
177
+ if not valid_texts:
178
+ return [[0.0] * 1536] * len(texts)
179
+
180
  try:
181
+ model = os.getenv("OPENAI_EMBEDDING_MODEL", "openai/text-embedding-3-small")
182
+
 
 
 
 
183
  response = await client.embeddings.create(
184
+ input=valid_texts,
185
  model=model
186
  )
187
+
188
  embeddings = [item.embedding for item in response.data]
189
+ logger.info(f"Generated {len(embeddings)} OpenRouter embeddings in batch ({model})")
190
+
191
+ # Rebuild full list with zero vectors for empty texts
192
+ result = []
193
+ embed_idx = 0
194
+ for text in texts:
195
+ if text.strip():
196
+ result.append(embeddings[embed_idx])
197
+ embed_idx += 1
198
+ else:
199
+ result.append([0.0] * 1536)
200
+ return result
201
+
202
  except Exception as e:
203
+ logger.error(f"Error generating batch embeddings (OpenRouter): {e}")
204
  raise
app/main.py CHANGED
@@ -20,7 +20,9 @@ app = FastAPI(
20
  # Add CORS middleware
21
  app.add_middleware(
22
  CORSMiddleware,
23
- allow_origins=["*"], # In production, change this to your specific frontend URL
 
 
24
  allow_credentials=True,
25
  allow_methods=["*"],
26
  allow_headers=["*"],
 
20
  # Add CORS middleware
21
  app.add_middleware(
22
  CORSMiddleware,
23
+ allow_origins=["*",
24
+ "http://localhost:3000",
25
+ "https://hacathoon1-deploy.vercel.app/"], # In production, change this to your specific frontend URL
26
  allow_credentials=True,
27
  allow_methods=["*"],
28
  allow_headers=["*"],
app/rag/__pycache__/generator.cpython-313.pyc CHANGED
Binary files a/app/rag/__pycache__/generator.cpython-313.pyc and b/app/rag/__pycache__/generator.cpython-313.pyc differ
 
app/rag/__pycache__/retriever.cpython-313.pyc CHANGED
Binary files a/app/rag/__pycache__/retriever.cpython-313.pyc and b/app/rag/__pycache__/retriever.cpython-313.pyc differ
 
app/rag/generator.py CHANGED
@@ -1,41 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  Answer generation module for the RAG (Retrieval Augmented Generation) system.
3
 
4
- This module provides async methods to generate answers using OpenAI's
5
- Chat Completion API based on the provided context and user question.
6
  """
 
7
  from typing import Dict, Any, Optional
8
- import openai
9
  import logging
10
- from openai import AsyncOpenAI
11
- from dotenv import load_dotenv
12
  import os
13
- from app.core.config import settings
14
 
15
 
16
  logger = logging.getLogger(__name__)
17
 
18
- # Load environment variables
19
- load_dotenv()
20
-
21
- # Initialize the OpenAI client
22
- client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
23
 
24
- # Use the model specified in the settings, defaulting to gpt-3.5-turbo
25
- OPENAI_MODEL = getattr(settings, "OPENAI_MODEL", "gpt-3.5-turbo")
 
 
 
 
 
 
 
 
26
 
27
 
28
  class AnswerGenerator:
29
  """
30
- Generates answers using OpenAI's Chat Completion API based on context.
31
  """
32
 
33
  def __init__(self, temperature: float = 0.3):
34
  """
35
  Initialize the AnswerGenerator with a specific temperature.
36
-
37
- Args:
38
- temperature: Controls randomness in generation (0.0-1.0, lower means less random)
39
  """
40
  if temperature > 0.3:
41
  logger.warning(f"Temperature {temperature} is higher than recommended maximum of 0.3 for RAG application")
@@ -48,15 +192,7 @@ class AnswerGenerator:
48
  max_tokens: int = 1000
49
  ) -> Optional[Dict[str, Any]]:
50
  """
51
- Generate an answer using OpenAI Chat Completion API.
52
-
53
- Args:
54
- system_message: The system message providing context and instructions
55
- user_message: The user message containing the question and context
56
- max_tokens: Maximum number of tokens to generate (default: 1000)
57
-
58
- Returns:
59
- Dictionary containing the response or None if generation failed
60
  """
61
  try:
62
  response = await client.chat.completions.create(
@@ -67,11 +203,10 @@ class AnswerGenerator:
67
  ],
68
  temperature=self.temperature,
69
  max_tokens=max_tokens,
70
- timeout=30 # 30 second timeout
71
  )
72
 
73
- # Extract the answer from the response
74
- answer = response.choices[0].message.content
75
  usage = {
76
  "prompt_tokens": response.usage.prompt_tokens if response.usage else 0,
77
  "completion_tokens": response.usage.completion_tokens if response.usage else 0,
@@ -84,25 +219,16 @@ class AnswerGenerator:
84
  "model": response.model
85
  }
86
 
87
- logger.info(f"Successfully generated answer with {usage['total_tokens']} total tokens used")
88
  return result
89
 
90
- except openai.APIError as e:
91
- logger.error(f"OpenAI API error during answer generation: {e}")
92
- return None
93
- except Exception as e:
94
- logger.error(f"Unexpected error during answer generation: {e}")
95
  return None
96
 
97
  async def generate_answer_simple(self, prompt: str) -> Optional[str]:
98
  """
99
  Generate an answer using a simple prompt format.
100
-
101
- Args:
102
- prompt: Complete prompt string including system instructions and user question
103
-
104
- Returns:
105
- Generated answer text or None if generation failed
106
  """
107
  try:
108
  response = await client.chat.completions.create(
@@ -111,16 +237,13 @@ class AnswerGenerator:
111
  {"role": "user", "content": prompt}
112
  ],
113
  temperature=self.temperature,
114
- timeout=30 # 30 second timeout
115
  )
116
 
117
- answer = response.choices[0].message.content
118
- logger.info(f"Successfully generated answer with model {response.model}")
119
  return answer
120
 
121
- except openai.APIError as e:
122
- logger.error(f"OpenAI API error during simple answer generation: {e}")
123
- return None
124
  except Exception as e:
125
- logger.error(f"Unexpected error during simple answer generation: {e}")
126
  return None
 
1
+ # """
2
+ # Answer generation module for the RAG (Retrieval Augmented Generation) system.
3
+
4
+ # This module provides async methods to generate answers using OpenAI's
5
+ # Chat Completion API based on the provided context and user question.
6
+ # """
7
+ # from typing import Dict, Any, Optional
8
+ # import openai
9
+ # import logging
10
+ # from openai import AsyncOpenAI
11
+ # from dotenv import load_dotenv
12
+ # import os
13
+ # from app.core.config import settings
14
+
15
+
16
+ # logger = logging.getLogger(__name__)
17
+
18
+ # # Load environment variables
19
+ # load_dotenv()
20
+
21
+ # # Initialize the OpenAI client
22
+ # client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
23
+
24
+ # # Use the model specified in the settings, defaulting to gpt-3.5-turbo
25
+ # OPENAI_MODEL = getattr(settings, "OPENAI_MODEL", "gpt-3.5-turbo")
26
+
27
+
28
+ # class AnswerGenerator:
29
+ # """
30
+ # Generates answers using OpenAI's Chat Completion API based on context.
31
+ # """
32
+
33
+ # def __init__(self, temperature: float = 0.3):
34
+ # """
35
+ # Initialize the AnswerGenerator with a specific temperature.
36
+
37
+ # Args:
38
+ # temperature: Controls randomness in generation (0.0-1.0, lower means less random)
39
+ # """
40
+ # if temperature > 0.3:
41
+ # logger.warning(f"Temperature {temperature} is higher than recommended maximum of 0.3 for RAG application")
42
+ # self.temperature = temperature
43
+
44
+ # async def generate_answer(
45
+ # self,
46
+ # system_message: str,
47
+ # user_message: str,
48
+ # max_tokens: int = 1000
49
+ # ) -> Optional[Dict[str, Any]]:
50
+ # """
51
+ # Generate an answer using OpenAI Chat Completion API.
52
+
53
+ # Args:
54
+ # system_message: The system message providing context and instructions
55
+ # user_message: The user message containing the question and context
56
+ # max_tokens: Maximum number of tokens to generate (default: 1000)
57
+
58
+ # Returns:
59
+ # Dictionary containing the response or None if generation failed
60
+ # """
61
+ # try:
62
+ # response = await client.chat.completions.create(
63
+ # model=OPENAI_MODEL,
64
+ # messages=[
65
+ # {"role": "system", "content": system_message},
66
+ # {"role": "user", "content": user_message}
67
+ # ],
68
+ # temperature=self.temperature,
69
+ # max_tokens=max_tokens,
70
+ # timeout=30 # 30 second timeout
71
+ # )
72
+
73
+ # # Extract the answer from the response
74
+ # answer = response.choices[0].message.content
75
+ # usage = {
76
+ # "prompt_tokens": response.usage.prompt_tokens if response.usage else 0,
77
+ # "completion_tokens": response.usage.completion_tokens if response.usage else 0,
78
+ # "total_tokens": response.usage.total_tokens if response.usage else 0
79
+ # }
80
+
81
+ # result = {
82
+ # "answer": answer,
83
+ # "usage": usage,
84
+ # "model": response.model
85
+ # }
86
+
87
+ # logger.info(f"Successfully generated answer with {usage['total_tokens']} total tokens used")
88
+ # return result
89
+
90
+ # except openai.APIError as e:
91
+ # logger.error(f"OpenAI API error during answer generation: {e}")
92
+ # return None
93
+ # except Exception as e:
94
+ # logger.error(f"Unexpected error during answer generation: {e}")
95
+ # return None
96
+
97
+ # async def generate_answer_simple(self, prompt: str) -> Optional[str]:
98
+ # """
99
+ # Generate an answer using a simple prompt format.
100
+
101
+ # Args:
102
+ # prompt: Complete prompt string including system instructions and user question
103
+
104
+ # Returns:
105
+ # Generated answer text or None if generation failed
106
+ # """
107
+ # try:
108
+ # response = await client.chat.completions.create(
109
+ # model=OPENAI_MODEL,
110
+ # messages=[
111
+ # {"role": "user", "content": prompt}
112
+ # ],
113
+ # temperature=self.temperature,
114
+ # timeout=30 # 30 second timeout
115
+ # )
116
+
117
+ # answer = response.choices[0].message.content
118
+ # logger.info(f"Successfully generated answer with model {response.model}")
119
+ # return answer
120
+
121
+ # except openai.APIError as e:
122
+ # logger.error(f"OpenAI API error during simple answer generation: {e}")
123
+ # return None
124
+ # except Exception as e:
125
+ # logger.error(f"Unexpected error during simple answer generation: {e}")
126
+ # return None
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
  """
143
  Answer generation module for the RAG (Retrieval Augmented Generation) system.
144
 
145
+ This module provides async methods to generate answers using OpenRouter
146
+ (via OpenAI-compatible API) based on the provided context and user question.
147
  """
148
+
149
  from typing import Dict, Any, Optional
 
150
  import logging
 
 
151
  import os
152
+ from openai import AsyncOpenAI # OpenAI client works with OpenRouter too!
153
 
154
 
155
  logger = logging.getLogger(__name__)
156
 
157
+ # Initialize the OpenAI client with OpenRouter base URL
158
+ client = AsyncOpenAI(
159
+ api_key=os.getenv("OPENAI_API_KEY"), # Yeh tumhara OpenRouter key hoga
160
+ base_url="https://openrouter.ai/api/v1" # <-- Yeh line sabse zaroori hai!
161
+ )
162
 
163
+ # Model ko OpenRouter pe available powerful & sasta model use karo
164
+ # Recommended options (2025 Dec ke hisaab se):
165
+ OPENAI_MODEL = os.getenv(
166
+ "OPENAI_MODEL",
167
+ "meta-llama/llama-3.1-70b-instruct" # Best balance: smart + sasta
168
+ # Alternatives:
169
+ # "meta-llama/llama-3.1-8b-instruct:free" # Completely free
170
+ # "google/gemini-flash-1.5" # Fast & reliable
171
+ # "anthropic/claude-3.5-sonnet" # Top quality (thoda costly)
172
+ )
173
 
174
 
175
  class AnswerGenerator:
176
  """
177
+ Generates answers using OpenRouter (OpenAI-compatible) API based on context.
178
  """
179
 
180
  def __init__(self, temperature: float = 0.3):
181
  """
182
  Initialize the AnswerGenerator with a specific temperature.
 
 
 
183
  """
184
  if temperature > 0.3:
185
  logger.warning(f"Temperature {temperature} is higher than recommended maximum of 0.3 for RAG application")
 
192
  max_tokens: int = 1000
193
  ) -> Optional[Dict[str, Any]]:
194
  """
195
+ Generate an answer using OpenRouter Chat Completion API.
 
 
 
 
 
 
 
 
196
  """
197
  try:
198
  response = await client.chat.completions.create(
 
203
  ],
204
  temperature=self.temperature,
205
  max_tokens=max_tokens,
206
+ timeout=60 # OpenRouter thoda slow ho sakta hai, timeout badha diya
207
  )
208
 
209
+ answer = response.choices[0].message.content.strip()
 
210
  usage = {
211
  "prompt_tokens": response.usage.prompt_tokens if response.usage else 0,
212
  "completion_tokens": response.usage.completion_tokens if response.usage else 0,
 
219
  "model": response.model
220
  }
221
 
222
+ logger.info(f"Successfully generated answer using {response.model} | Tokens: {usage['total_tokens']}")
223
  return result
224
 
225
+ except Exception as e: # Broad catch kyuki OpenRouter se alag error format aa sakte hain
226
+ logger.error(f"Error during answer generation (OpenRouter): {e}")
 
 
 
227
  return None
228
 
229
  async def generate_answer_simple(self, prompt: str) -> Optional[str]:
230
  """
231
  Generate an answer using a simple prompt format.
 
 
 
 
 
 
232
  """
233
  try:
234
  response = await client.chat.completions.create(
 
237
  {"role": "user", "content": prompt}
238
  ],
239
  temperature=self.temperature,
240
+ timeout=60
241
  )
242
 
243
+ answer = response.choices[0].message.content.strip()
244
+ logger.info(f"Simple answer generated with {response.model}")
245
  return answer
246
 
 
 
 
247
  except Exception as e:
248
+ logger.error(f"Error during simple answer generation (OpenRouter): {e}")
249
  return None
app/rag/retriever.py CHANGED
@@ -81,4 +81,30 @@ class VectorRetriever:
81
  return results
82
  except Exception as e:
83
  logger.error(f"Error retrieving vectors by ID: {e}")
84
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  return results
82
  except Exception as e:
83
  logger.error(f"Error retrieving vectors by ID: {e}")
84
+ return []
85
+
86
+
87
+
88
+
89
+
90
+
91
+
92
+
93
+
94
+
95
+
96
+
97
+
98
+
99
+
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+ # ==========================================================
110
+
app/services/__pycache__/agent_service.cpython-313.pyc CHANGED
Binary files a/app/services/__pycache__/agent_service.cpython-313.pyc and b/app/services/__pycache__/agent_service.cpython-313.pyc differ
 
app/services/agent_service.py CHANGED
@@ -7,8 +7,11 @@ import os
7
 
8
 
9
  logger = logging.getLogger(__name__)
10
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
11
 
 
 
 
 
12
 
13
  class AgentService:
14
  """Service class for the agent that intelligently routes queries based on context."""
@@ -83,7 +86,7 @@ Answer clearly and helpfully.
83
 
84
  try:
85
  response = client.chat.completions.create(
86
- model="gpt-4o-mini", # Ya gpt-3.5-turbo rakh
87
  messages=[
88
  {"role": "system", "content": "You are a helpful course assistant. Be accurate and friendly."},
89
  {"role": "user", "content": prompt}
@@ -157,7 +160,7 @@ Answer clearly and helpfully.
157
 
158
  try:
159
  response = client.chat.completions.create(
160
- model="gpt-4o-mini",
161
  messages=[
162
  {"role": "system", "content": "You are a friendly and helpful AI assistant."},
163
  {"role": "user", "content": request.question}
 
7
 
8
 
9
  logger = logging.getLogger(__name__)
 
10
 
11
+ client = OpenAI(
12
+ api_key=os.getenv("OPENAI_API_KEY"), # yeh OpenRouter ka key rahega
13
+ base_url="https://openrouter.ai/api/v1" # <-- Yeh line add kar do (sabse zaroori!)
14
+ )
15
 
16
  class AgentService:
17
  """Service class for the agent that intelligently routes queries based on context."""
 
86
 
87
  try:
88
  response = client.chat.completions.create(
89
+ model="mistralai/devstral-2512:free", #
90
  messages=[
91
  {"role": "system", "content": "You are a helpful course assistant. Be accurate and friendly."},
92
  {"role": "user", "content": prompt}
 
160
 
161
  try:
162
  response = client.chat.completions.create(
163
+ model="mistralai/devstral-2512:free",
164
  messages=[
165
  {"role": "system", "content": "You are a friendly and helpful AI assistant."},
166
  {"role": "user", "content": request.question}
app/utils/__pycache__/embeddings.cpython-313.pyc CHANGED
Binary files a/app/utils/__pycache__/embeddings.cpython-313.pyc and b/app/utils/__pycache__/embeddings.cpython-313.pyc differ
 
app/utils/embeddings.py CHANGED
@@ -14,13 +14,10 @@ from dotenv import load_dotenv
14
  # Load environment variables
15
  load_dotenv()
16
 
17
- # Initialize the OpenAI client
18
- client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
19
-
20
- # Get the embedding model from environment variables, default to the one used in ingestion
21
- EMBEDDING_MODEL = os.getenv("OPENAI_EMBEDDING_MODEL", "text-embedding-ada-002")
22
-
23
-
24
  async def get_embeddings(texts: Union[str, List[str]]) -> List[List[float]]:
25
  """
26
  Convert text(s) to embeddings using the OpenAI embedding model.
@@ -38,7 +35,7 @@ async def get_embeddings(texts: Union[str, List[str]]) -> List[List[float]]:
38
  # Create embeddings using OpenAI API
39
  response = await client.embeddings.create(
40
  input=texts,
41
- model=EMBEDDING_MODEL
42
  )
43
 
44
  # Extract and return the embeddings
 
14
  # Load environment variables
15
  load_dotenv()
16
 
17
+ client = AsyncOpenAI(
18
+ api_key=os.getenv("OPENAI_API_KEY"), # Tumhara OpenRouter key
19
+ base_url="https://openrouter.ai/api/v1" # Yeh zaroori hai embeddings ke liye bhi
20
+ )
 
 
 
21
  async def get_embeddings(texts: Union[str, List[str]]) -> List[List[float]]:
22
  """
23
  Convert text(s) to embeddings using the OpenAI embedding model.
 
35
  # Create embeddings using OpenAI API
36
  response = await client.embeddings.create(
37
  input=texts,
38
+ model = os.getenv("OPENAI_EMBEDDING_MODEL", "openai/text-embedding-3-small") # Use the model from env variable
39
  )
40
 
41
  # Extract and return the embeddings
requirements.txt CHANGED
@@ -6,4 +6,6 @@ pydantic-settings
6
  asyncpg
7
  qdrant-client
8
  openai
9
- python-markdown
 
 
 
6
  asyncpg
7
  qdrant-client
8
  openai
9
+ python-markdown
10
+ langchain-cohere
11
+ cohere