cdpearlman Cursor commited on
Commit
ef3e36a
·
1 Parent(s): ddd91a5

Migrate from Gemini to OpenRouter API

Browse files

Co-authored-by: Cursor <cursoragent@cursor.com>

app.py CHANGED
@@ -1004,7 +1004,7 @@ def clear_chat_history(n_clicks):
1004
  def send_chat_message(send_clicks, user_input, chat_history,
1005
  model_name, prompt, activation_data, ablated_heads):
1006
  """Handle sending a chat message and getting AI response."""
1007
- from utils.gemini_client import generate_response
1008
  from utils.rag_utils import build_rag_context
1009
 
1010
  if not user_input or not user_input.strip():
 
1004
  def send_chat_message(send_clicks, user_input, chat_history,
1005
  model_name, prompt, activation_data, ablated_heads):
1006
  """Handle sending a chat message and getting AI response."""
1007
+ from utils.openrouter_client import generate_response
1008
  from utils.rag_utils import build_rag_context
1009
 
1010
  if not user_input or not user_input.strip():
requirements.txt CHANGED
@@ -17,5 +17,6 @@ numpy>=1.24.0
17
  # Testing dependencies
18
  pytest>=7.0.0
19
 
20
- # AI Chatbot dependencies
21
- google-genai>=1.0.0
 
 
17
  # Testing dependencies
18
  pytest>=7.0.0
19
 
20
+ # AI Chatbot dependencies (OpenRouter API)
21
+ requests>=2.28.0
22
+ python-dotenv>=1.0.0
tests/test_gemini_connection.py DELETED
@@ -1,84 +0,0 @@
1
- """
2
- Tests for Gemini API connection.
3
-
4
- Verifies that the API key is configured correctly and can connect
5
- to the Gemini API without consuming generation tokens.
6
-
7
- Uses the new google-genai SDK.
8
- """
9
-
10
- import os
11
- import pytest
12
- from dotenv import load_dotenv
13
-
14
- # Load environment variables for tests
15
- load_dotenv()
16
-
17
-
18
- class TestGeminiConnection:
19
- """Test suite for Gemini API connectivity."""
20
-
21
- def test_api_key_is_set(self):
22
- """Verify GEMINI_API_KEY environment variable is configured."""
23
- api_key = os.environ.get("GEMINI_API_KEY")
24
- assert api_key is not None, "GEMINI_API_KEY environment variable is not set"
25
- assert len(api_key) > 0, "GEMINI_API_KEY is empty"
26
- # Basic format check (Gemini keys are typically 39+ characters)
27
- assert len(api_key) > 10, "GEMINI_API_KEY appears too short to be valid"
28
-
29
- @pytest.mark.timeout(30)
30
- def test_can_list_models(self):
31
- """
32
- Test API connectivity by listing available models.
33
- This verifies the API key is valid without consuming generation tokens.
34
- """
35
- from google import genai
36
-
37
- api_key = os.environ.get("GEMINI_API_KEY")
38
- client = genai.Client(api_key=api_key)
39
-
40
- # List models - this is a read-only API call that validates the key
41
- models = list(client.models.list())
42
-
43
- assert len(models) > 0, "No models returned - API key may be invalid"
44
-
45
- # Verify we can see generation models
46
- model_names = [m.name for m in models]
47
- has_gemini_model = any("gemini" in name.lower() for name in model_names)
48
- assert has_gemini_model, "No Gemini models found in available models list"
49
-
50
- @pytest.mark.timeout(30)
51
- def test_flash_model_available(self):
52
- """Verify a Gemini Flash model (used by default) is available."""
53
- from google import genai
54
-
55
- api_key = os.environ.get("GEMINI_API_KEY")
56
- client = genai.Client(api_key=api_key)
57
-
58
- models = list(client.models.list())
59
- model_names = [m.name for m in models]
60
-
61
- # Check for flash model variants (our default is gemini-2.0-flash)
62
- has_flash_model = any("flash" in name.lower() for name in model_names)
63
- assert has_flash_model, (
64
- f"No Gemini Flash models available. "
65
- f"Available models: {model_names[:10]}..."
66
- )
67
-
68
- @pytest.mark.timeout(30)
69
- def test_embedding_model_available(self):
70
- """Verify the embedding model is available."""
71
- from google import genai
72
-
73
- api_key = os.environ.get("GEMINI_API_KEY")
74
- client = genai.Client(api_key=api_key)
75
-
76
- models = list(client.models.list())
77
- model_names = [m.name for m in models]
78
-
79
- # Check for embedding model (gemini-embedding-001)
80
- has_embedding_model = any("embedding" in name.lower() for name in model_names)
81
- assert has_embedding_model, (
82
- f"No embedding models available. "
83
- f"Available models: {model_names[:10]}..."
84
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tests/test_openrouter_connection.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for OpenRouter API connection.
3
+
4
+ Verifies that the API key is configured correctly and can connect
5
+ to the OpenRouter API without consuming many tokens.
6
+ """
7
+
8
+ import os
9
+ import pytest
10
+ import requests
11
+ from dotenv import load_dotenv
12
+
13
+ # Load environment variables for tests
14
+ load_dotenv()
15
+
16
+ # OpenRouter API endpoint
17
+ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
18
+
19
+
20
+ class TestOpenRouterConnection:
21
+ """Test suite for OpenRouter API connectivity."""
22
+
23
+ def test_api_key_is_set(self):
24
+ """Verify OPENROUTER_API_KEY environment variable is configured."""
25
+ api_key = os.environ.get("OPENROUTER_API_KEY")
26
+ assert api_key is not None, "OPENROUTER_API_KEY environment variable is not set"
27
+ assert len(api_key) > 0, "OPENROUTER_API_KEY is empty"
28
+ # OpenRouter keys start with "sk-or-"
29
+ assert len(api_key) > 10, "OPENROUTER_API_KEY appears too short to be valid"
30
+
31
+ @pytest.mark.timeout(30)
32
+ def test_can_list_models(self):
33
+ """
34
+ Test API connectivity by listing available models.
35
+ This verifies the API key is valid without consuming generation tokens.
36
+ """
37
+ api_key = os.environ.get("OPENROUTER_API_KEY")
38
+
39
+ headers = {
40
+ "Authorization": f"Bearer {api_key}",
41
+ "Content-Type": "application/json"
42
+ }
43
+
44
+ response = requests.get(
45
+ f"{OPENROUTER_BASE_URL}/models",
46
+ headers=headers,
47
+ timeout=30
48
+ )
49
+ response.raise_for_status()
50
+
51
+ data = response.json()
52
+ models = data.get("data", [])
53
+
54
+ assert len(models) > 0, "No models returned - API key may be invalid"
55
+
56
+ # Verify we can see some models
57
+ model_ids = [m.get("id", "") for m in models]
58
+ assert len(model_ids) > 0, "No model IDs found in available models list"
59
+
60
+ @pytest.mark.timeout(30)
61
+ def test_chat_model_available(self):
62
+ """Verify the configured chat model is available."""
63
+ from utils.openrouter_client import DEFAULT_CHAT_MODEL
64
+
65
+ api_key = os.environ.get("OPENROUTER_API_KEY")
66
+
67
+ headers = {
68
+ "Authorization": f"Bearer {api_key}",
69
+ "Content-Type": "application/json"
70
+ }
71
+
72
+ response = requests.get(
73
+ f"{OPENROUTER_BASE_URL}/models",
74
+ headers=headers,
75
+ timeout=30
76
+ )
77
+ response.raise_for_status()
78
+
79
+ data = response.json()
80
+ models = data.get("data", [])
81
+ model_ids = [m.get("id", "") for m in models]
82
+
83
+ # Check for the configured model or similar
84
+ model_family = DEFAULT_CHAT_MODEL.split("/")[0] if "/" in DEFAULT_CHAT_MODEL else DEFAULT_CHAT_MODEL
85
+ has_model = any(model_family in mid for mid in model_ids)
86
+
87
+ assert has_model or DEFAULT_CHAT_MODEL in model_ids, (
88
+ f"Chat model '{DEFAULT_CHAT_MODEL}' or similar not found. "
89
+ f"Sample available models: {model_ids[:10]}..."
90
+ )
91
+
92
+ @pytest.mark.timeout(30)
93
+ def test_embedding_model_available(self):
94
+ """Verify an embedding model is available."""
95
+ api_key = os.environ.get("OPENROUTER_API_KEY")
96
+
97
+ headers = {
98
+ "Authorization": f"Bearer {api_key}",
99
+ "Content-Type": "application/json"
100
+ }
101
+
102
+ response = requests.get(
103
+ f"{OPENROUTER_BASE_URL}/models",
104
+ headers=headers,
105
+ timeout=30
106
+ )
107
+ response.raise_for_status()
108
+
109
+ data = response.json()
110
+ models = data.get("data", [])
111
+ model_ids = [m.get("id", "") for m in models]
112
+
113
+ # Check for embedding model
114
+ has_embedding_model = any("embedding" in mid.lower() for mid in model_ids)
115
+
116
+ # OpenRouter may not list embedding models separately, so this is a soft check
117
+ if not has_embedding_model:
118
+ print(f"Note: No embedding models explicitly listed. Available: {model_ids[:10]}...")
todo.md CHANGED
@@ -151,15 +151,22 @@
151
  - [x] Tests verify: API key is set, can list models, flash model available
152
  - Note: On Hugging Face Spaces, set `GEMINI_API_KEY` in Repository Secrets
153
 
154
- ## Completed: Migrate to New Google GenAI SDK
155
 
156
  - [x] Update `requirements.txt`: `google-generativeai` → `google-genai>=1.0.0`
157
  - [x] Rewrite `utils/gemini_client.py` using new centralized Client architecture
158
- - New import: `from google import genai` and `from google.genai import types`
159
- - Client-based API: `client = genai.Client(api_key=...)`
160
- - Chat via: `client.chats.create(model=..., config=..., history=...)`
161
- - Embeddings via: `client.models.embed_content(model=..., contents=..., config=...)`
162
- - [x] Update embedding model: `models/text-embedding-004` → `gemini-embedding-001`
163
- - [x] Update `tests/test_gemini_connection.py` to use new SDK
164
  - [x] All 4 connection tests pass
165
  - [x] Verified: embeddings work (3072 dimensions), chat generation works
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  - [x] Tests verify: API key is set, can list models, flash model available
152
  - Note: On Hugging Face Spaces, set `GEMINI_API_KEY` in Repository Secrets
153
 
154
+ ## Completed: Migrate to New Google GenAI SDK (Superseded)
155
 
156
  - [x] Update `requirements.txt`: `google-generativeai` → `google-genai>=1.0.0`
157
  - [x] Rewrite `utils/gemini_client.py` using new centralized Client architecture
 
 
 
 
 
 
158
  - [x] All 4 connection tests pass
159
  - [x] Verified: embeddings work (3072 dimensions), chat generation works
160
+
161
+ ## Completed: Migrate from Gemini to OpenRouter
162
+
163
+ - [x] Create `utils/openrouter_client.py` with OpenAI-compatible API
164
+ - Global model config: `DEFAULT_CHAT_MODEL` and `DEFAULT_EMBEDDING_MODEL`
165
+ - Chat via: `POST /api/v1/chat/completions`
166
+ - Embeddings via: `POST /api/v1/embeddings`
167
+ - [x] Update `utils/rag_utils.py` imports to use openrouter_client
168
+ - [x] Update `app.py` imports to use openrouter_client
169
+ - [x] Create `tests/test_openrouter_connection.py` for API connectivity tests
170
+ - [x] Delete old `utils/gemini_client.py` and `tests/test_gemini_connection.py`
171
+ - [x] Update `requirements.txt`: remove `google-genai`, add `requests>=2.28.0`
172
+ - [x] Environment variable: `GEMINI_API_KEY` → `OPENROUTER_API_KEY`
utils/{gemini_client.py → openrouter_client.py} RENAMED
@@ -1,21 +1,29 @@
1
  """
2
- Gemini API Client
3
 
4
- Wrapper for Google Gemini API providing text generation and embedding capabilities
5
  for the AI chatbot feature.
6
 
7
- Uses the new google-genai SDK (migrated from deprecated google-generativeai).
8
  """
9
 
10
  import os
 
11
  from typing import List, Dict, Optional
12
- from google import genai
13
- from google.genai import types
14
 
15
 
16
- # Default model configuration
17
- DEFAULT_GENERATION_MODEL = "gemini-2.0-flash"
18
- DEFAULT_EMBEDDING_MODEL = "gemini-embedding-001"
 
 
 
 
 
 
 
 
 
19
 
20
  # System prompt for the chatbot
21
  SYSTEM_PROMPT = """You are a helpful AI assistant integrated into a Transformer Explanation Dashboard.
@@ -36,39 +44,38 @@ When answering:
36
  Dashboard context will be provided in the user's messages when available."""
37
 
38
 
39
- class GeminiClient:
40
- """Client for interacting with Google Gemini API."""
41
 
42
  def __init__(self, api_key: Optional[str] = None):
43
  """
44
- Initialize the Gemini client.
45
 
46
  Args:
47
- api_key: Gemini API key. If not provided, reads from GEMINI_API_KEY env var.
48
  """
49
- self.api_key = api_key or os.environ.get("GEMINI_API_KEY")
50
  self._initialized = False
51
- self._client = None
52
 
53
  if self.api_key:
54
  self._initialize()
55
 
56
  def _initialize(self):
57
- """Initialize the Gemini API client."""
58
  if not self.api_key:
59
  return
60
 
61
- try:
62
- # Create the centralized client object (new SDK architecture)
63
- self._client = genai.Client(api_key=self.api_key)
64
- self._initialized = True
65
- except Exception as e:
66
- print(f"Error initializing Gemini client: {e}")
67
- self._initialized = False
68
 
69
  @property
70
  def is_available(self) -> bool:
71
- """Check if the Gemini API is available and configured."""
72
  return self._initialized and self.api_key is not None
73
 
74
  def generate_response(
@@ -79,7 +86,7 @@ class GeminiClient:
79
  dashboard_context: Optional[Dict] = None
80
  ) -> str:
81
  """
82
- Generate a response using Gemini.
83
 
84
  Args:
85
  user_message: The user's message
@@ -91,43 +98,61 @@ class GeminiClient:
91
  Generated response text
92
  """
93
  if not self.is_available:
94
- return "Sorry, the AI assistant is not available. Please check that the GEMINI_API_KEY environment variable is set."
95
 
96
  try:
97
  # Build the full prompt with context
98
  full_message = self._build_prompt(user_message, rag_context, dashboard_context)
99
 
100
- # Convert chat history to new SDK format
101
- history = []
 
 
102
  if chat_history:
103
  for msg in chat_history[-10:]: # Keep last 10 messages for context
104
- role = "user" if msg.get("role") == "user" else "model"
105
- history.append({
106
  "role": role,
107
- "parts": [{"text": msg.get("content", "")}]
108
  })
109
 
110
- # Create chat session with system instruction and send message
111
- chat = self._client.chats.create(
112
- model=DEFAULT_GENERATION_MODEL,
113
- config=types.GenerateContentConfig(
114
- system_instruction=SYSTEM_PROMPT,
115
- ),
116
- history=history
 
 
 
 
 
117
  )
118
- response = chat.send_message(message=full_message)
119
 
120
- return response.text
 
121
 
122
- except Exception as e:
123
  error_msg = str(e)
124
- if "quota" in error_msg.lower() or "rate" in error_msg.lower():
 
 
 
 
 
 
 
125
  return f"The AI service is currently rate limited. Please try again in a moment. {error_msg}"
126
- elif "invalid" in error_msg.lower() and "key" in error_msg.lower():
127
- return "Invalid API key. Please check your GEMINI_API_KEY configuration."
128
  else:
129
- print(f"Gemini API error: {e}")
130
  return f"Sorry, I encountered an error: {error_msg}"
 
 
 
131
 
132
  def _build_prompt(
133
  self,
@@ -180,7 +205,7 @@ class GeminiClient:
180
 
181
  def get_embedding(self, text: str) -> Optional[List[float]]:
182
  """
183
- Get embedding vector for text using Gemini Embedding API.
184
 
185
  Args:
186
  text: Text to embed
@@ -192,22 +217,29 @@ class GeminiClient:
192
  return None
193
 
194
  try:
195
- result = self._client.models.embed_content(
196
- model=DEFAULT_EMBEDDING_MODEL,
197
- contents=text,
198
- config=types.EmbedContentConfig(
199
- task_type="RETRIEVAL_DOCUMENT"
200
- )
 
 
201
  )
202
- # New SDK returns embeddings as a list, get the first one
203
- return result.embeddings[0].values
 
 
204
  except Exception as e:
205
  print(f"Embedding error: {e}")
206
  return None
207
 
208
  def get_query_embedding(self, query: str) -> Optional[List[float]]:
209
  """
210
- Get embedding vector for a query (uses different task type for better retrieval).
 
 
 
211
 
212
  Args:
213
  query: Query text to embed
@@ -215,33 +247,18 @@ class GeminiClient:
215
  Returns:
216
  Embedding vector as list of floats, or None if failed
217
  """
218
- if not self.is_available:
219
- return None
220
-
221
- try:
222
- result = self._client.models.embed_content(
223
- model=DEFAULT_EMBEDDING_MODEL,
224
- contents=query,
225
- config=types.EmbedContentConfig(
226
- task_type="RETRIEVAL_QUERY"
227
- )
228
- )
229
- # New SDK returns embeddings as a list, get the first one
230
- return result.embeddings[0].values
231
- except Exception as e:
232
- print(f"Query embedding error: {e}")
233
- return None
234
 
235
 
236
  # Singleton instance
237
- _client_instance: Optional[GeminiClient] = None
238
 
239
 
240
- def get_gemini_client() -> GeminiClient:
241
- """Get or create the singleton Gemini client instance."""
242
  global _client_instance
243
  if _client_instance is None:
244
- _client_instance = GeminiClient()
245
  return _client_instance
246
 
247
 
@@ -263,17 +280,22 @@ def generate_response(
263
  Returns:
264
  Generated response text
265
  """
266
- client = get_gemini_client()
267
  return client.generate_response(user_message, chat_history, rag_context, dashboard_context)
268
 
269
 
270
  def get_embedding(text: str) -> Optional[List[float]]:
271
  """Convenience function to get document embedding."""
272
- client = get_gemini_client()
273
  return client.get_embedding(text)
274
 
275
 
276
  def get_query_embedding(query: str) -> Optional[List[float]]:
277
  """Convenience function to get query embedding."""
278
- client = get_gemini_client()
279
  return client.get_query_embedding(query)
 
 
 
 
 
 
1
  """
2
+ OpenRouter API Client
3
 
4
+ Wrapper for OpenRouter API providing text generation and embedding capabilities
5
  for the AI chatbot feature.
6
 
7
+ Uses the OpenAI-compatible API via requests.
8
  """
9
 
10
  import os
11
+ import requests
12
  from typing import List, Dict, Optional
 
 
13
 
14
 
15
+ # =============================================================================
16
+ # GLOBAL MODEL CONFIGURATION
17
+ # =============================================================================
18
+ # Change these to switch models across the entire application
19
+
20
+ DEFAULT_CHAT_MODEL = "google/gemini-2.0-flash-001"
21
+ DEFAULT_EMBEDDING_MODEL = "openai/text-embedding-3-small"
22
+
23
+ # =============================================================================
24
+
25
+ # OpenRouter API endpoint
26
+ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
27
 
28
  # System prompt for the chatbot
29
  SYSTEM_PROMPT = """You are a helpful AI assistant integrated into a Transformer Explanation Dashboard.
 
44
  Dashboard context will be provided in the user's messages when available."""
45
 
46
 
47
+ class OpenRouterClient:
48
+ """Client for interacting with OpenRouter API."""
49
 
50
  def __init__(self, api_key: Optional[str] = None):
51
  """
52
+ Initialize the OpenRouter client.
53
 
54
  Args:
55
+ api_key: OpenRouter API key. If not provided, reads from OPENROUTER_API_KEY env var.
56
  """
57
+ self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY")
58
  self._initialized = False
 
59
 
60
  if self.api_key:
61
  self._initialize()
62
 
63
  def _initialize(self):
64
+ """Initialize the OpenRouter API client."""
65
  if not self.api_key:
66
  return
67
 
68
+ self._headers = {
69
+ "Authorization": f"Bearer {self.api_key}",
70
+ "Content-Type": "application/json",
71
+ "HTTP-Referer": "https://transformer-dashboard.local", # Optional: for rankings
72
+ "X-Title": "Transformer Explanation Dashboard" # Optional: for rankings
73
+ }
74
+ self._initialized = True
75
 
76
  @property
77
  def is_available(self) -> bool:
78
+ """Check if the OpenRouter API is available and configured."""
79
  return self._initialized and self.api_key is not None
80
 
81
  def generate_response(
 
86
  dashboard_context: Optional[Dict] = None
87
  ) -> str:
88
  """
89
+ Generate a response using OpenRouter.
90
 
91
  Args:
92
  user_message: The user's message
 
98
  Generated response text
99
  """
100
  if not self.is_available:
101
+ return "Sorry, the AI assistant is not available. Please check that the OPENROUTER_API_KEY environment variable is set."
102
 
103
  try:
104
  # Build the full prompt with context
105
  full_message = self._build_prompt(user_message, rag_context, dashboard_context)
106
 
107
+ # Build messages array with system prompt and history
108
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
109
+
110
+ # Add chat history
111
  if chat_history:
112
  for msg in chat_history[-10:]: # Keep last 10 messages for context
113
+ role = "user" if msg.get("role") == "user" else "assistant"
114
+ messages.append({
115
  "role": role,
116
+ "content": msg.get("content", "")
117
  })
118
 
119
+ # Add the current user message
120
+ messages.append({"role": "user", "content": full_message})
121
+
122
+ # Make API request
123
+ response = requests.post(
124
+ f"{OPENROUTER_BASE_URL}/chat/completions",
125
+ headers=self._headers,
126
+ json={
127
+ "model": DEFAULT_CHAT_MODEL,
128
+ "messages": messages
129
+ },
130
+ timeout=60
131
  )
132
+ response.raise_for_status()
133
 
134
+ data = response.json()
135
+ return data["choices"][0]["message"]["content"]
136
 
137
+ except requests.exceptions.HTTPError as e:
138
  error_msg = str(e)
139
+ if e.response is not None:
140
+ try:
141
+ error_data = e.response.json()
142
+ error_msg = error_data.get("error", {}).get("message", str(e))
143
+ except:
144
+ pass
145
+
146
+ if "rate" in error_msg.lower() or "429" in error_msg:
147
  return f"The AI service is currently rate limited. Please try again in a moment. {error_msg}"
148
+ elif "401" in error_msg or "invalid" in error_msg.lower():
149
+ return "Invalid API key. Please check your OPENROUTER_API_KEY configuration."
150
  else:
151
+ print(f"OpenRouter API error: {e}")
152
  return f"Sorry, I encountered an error: {error_msg}"
153
+ except Exception as e:
154
+ print(f"OpenRouter API error: {e}")
155
+ return f"Sorry, I encountered an error: {str(e)}"
156
 
157
  def _build_prompt(
158
  self,
 
205
 
206
  def get_embedding(self, text: str) -> Optional[List[float]]:
207
  """
208
+ Get embedding vector for text using OpenRouter Embedding API.
209
 
210
  Args:
211
  text: Text to embed
 
217
  return None
218
 
219
  try:
220
+ response = requests.post(
221
+ f"{OPENROUTER_BASE_URL}/embeddings",
222
+ headers=self._headers,
223
+ json={
224
+ "model": DEFAULT_EMBEDDING_MODEL,
225
+ "input": text
226
+ },
227
+ timeout=30
228
  )
229
+ response.raise_for_status()
230
+
231
+ data = response.json()
232
+ return data["data"][0]["embedding"]
233
  except Exception as e:
234
  print(f"Embedding error: {e}")
235
  return None
236
 
237
  def get_query_embedding(self, query: str) -> Optional[List[float]]:
238
  """
239
+ Get embedding vector for a query.
240
+
241
+ Note: OpenRouter doesn't have separate task types for embeddings,
242
+ so this calls the same endpoint as get_embedding.
243
 
244
  Args:
245
  query: Query text to embed
 
247
  Returns:
248
  Embedding vector as list of floats, or None if failed
249
  """
250
+ return self.get_embedding(query)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
252
 
253
  # Singleton instance
254
+ _client_instance: Optional[OpenRouterClient] = None
255
 
256
 
257
+ def get_openrouter_client() -> OpenRouterClient:
258
+ """Get or create the singleton OpenRouter client instance."""
259
  global _client_instance
260
  if _client_instance is None:
261
+ _client_instance = OpenRouterClient()
262
  return _client_instance
263
 
264
 
 
280
  Returns:
281
  Generated response text
282
  """
283
+ client = get_openrouter_client()
284
  return client.generate_response(user_message, chat_history, rag_context, dashboard_context)
285
 
286
 
287
  def get_embedding(text: str) -> Optional[List[float]]:
288
  """Convenience function to get document embedding."""
289
+ client = get_openrouter_client()
290
  return client.get_embedding(text)
291
 
292
 
293
  def get_query_embedding(query: str) -> Optional[List[float]]:
294
  """Convenience function to get query embedding."""
295
+ client = get_openrouter_client()
296
  return client.get_query_embedding(query)
297
+
298
+
299
+ # Backward compatibility aliases (for gradual migration)
300
+ GeminiClient = OpenRouterClient
301
+ get_gemini_client = get_openrouter_client
utils/rag_utils.py CHANGED
@@ -11,7 +11,7 @@ from pathlib import Path
11
  from typing import List, Dict, Optional, Tuple
12
  import numpy as np
13
 
14
- from utils.gemini_client import get_embedding, get_query_embedding, get_gemini_client
15
 
16
 
17
  # Configuration
@@ -189,9 +189,9 @@ class RAGService:
189
  if not self._loaded:
190
  self.load_documents()
191
 
192
- client = get_gemini_client()
193
  if not client.is_available:
194
- print("Gemini client not available, skipping embedding generation")
195
  return 0
196
 
197
  embedded_count = 0
 
11
  from typing import List, Dict, Optional, Tuple
12
  import numpy as np
13
 
14
+ from utils.openrouter_client import get_embedding, get_query_embedding, get_openrouter_client
15
 
16
 
17
  # Configuration
 
189
  if not self._loaded:
190
  self.load_documents()
191
 
192
+ client = get_openrouter_client()
193
  if not client.is_available:
194
+ print("OpenRouter client not available, skipping embedding generation")
195
  return 0
196
 
197
  embedded_count = 0