Spaces:

cdpearlman
/

LLMVis

Sleeping

App Files Files Community

cdpearlman Cursor commited on Feb 3

Commit

ef3e36a

1 Parent(s): ddd91a5

Migrate from Gemini to OpenRouter API

Browse files

Co-authored-by: Cursor <cursoragent@cursor.com>

Files changed (7) hide show

app.py +1 -1
requirements.txt +3 -2
tests/test_gemini_connection.py +0 -84
tests/test_openrouter_connection.py +118 -0
todo.md +14 -7
utils/{gemini_client.py → openrouter_client.py} +99 -77
utils/rag_utils.py +3 -3

app.py CHANGED Viewed

@@ -1004,7 +1004,7 @@ def clear_chat_history(n_clicks):
 def send_chat_message(send_clicks, user_input, chat_history,
                       model_name, prompt, activation_data, ablated_heads):
     """Handle sending a chat message and getting AI response."""
-    from utils.gemini_client import generate_response
     from utils.rag_utils import build_rag_context
     if not user_input or not user_input.strip():

 def send_chat_message(send_clicks, user_input, chat_history,
                       model_name, prompt, activation_data, ablated_heads):
     """Handle sending a chat message and getting AI response."""
+    from utils.openrouter_client import generate_response
     from utils.rag_utils import build_rag_context
     if not user_input or not user_input.strip():

requirements.txt CHANGED Viewed

@@ -17,5 +17,6 @@ numpy>=1.24.0
 # Testing dependencies
 pytest>=7.0.0
-# AI Chatbot dependencies
-google-genai>=1.0.0

 # Testing dependencies
 pytest>=7.0.0
+# AI Chatbot dependencies (OpenRouter API)
+requests>=2.28.0
+python-dotenv>=1.0.0

tests/test_gemini_connection.py DELETED Viewed

@@ -1,84 +0,0 @@
-"""
-Tests for Gemini API connection.
-Verifies that the API key is configured correctly and can connect
-to the Gemini API without consuming generation tokens.
-Uses the new google-genai SDK.
-"""
-import os
-import pytest
-from dotenv import load_dotenv
-# Load environment variables for tests
-load_dotenv()
-class TestGeminiConnection:
-    """Test suite for Gemini API connectivity."""
-    def test_api_key_is_set(self):
-        """Verify GEMINI_API_KEY environment variable is configured."""
-        api_key = os.environ.get("GEMINI_API_KEY")
-        assert api_key is not None, "GEMINI_API_KEY environment variable is not set"
-        assert len(api_key) > 0, "GEMINI_API_KEY is empty"
-        # Basic format check (Gemini keys are typically 39+ characters)
-        assert len(api_key) > 10, "GEMINI_API_KEY appears too short to be valid"
-    @pytest.mark.timeout(30)
-    def test_can_list_models(self):
-        """
-        Test API connectivity by listing available models.
-        This verifies the API key is valid without consuming generation tokens.
-        """
-        from google import genai
-        api_key = os.environ.get("GEMINI_API_KEY")
-        client = genai.Client(api_key=api_key)
-        # List models - this is a read-only API call that validates the key
-        models = list(client.models.list())
-        assert len(models) > 0, "No models returned - API key may be invalid"
-        # Verify we can see generation models
-        model_names = [m.name for m in models]
-        has_gemini_model = any("gemini" in name.lower() for name in model_names)
-        assert has_gemini_model, "No Gemini models found in available models list"
-    @pytest.mark.timeout(30)
-    def test_flash_model_available(self):
-        """Verify a Gemini Flash model (used by default) is available."""
-        from google import genai
-        api_key = os.environ.get("GEMINI_API_KEY")
-        client = genai.Client(api_key=api_key)
-        models = list(client.models.list())
-        model_names = [m.name for m in models]
-        # Check for flash model variants (our default is gemini-2.0-flash)
-        has_flash_model = any("flash" in name.lower() for name in model_names)
-        assert has_flash_model, (
-            f"No Gemini Flash models available. "
-            f"Available models: {model_names[:10]}..."
-        )
-    @pytest.mark.timeout(30)
-    def test_embedding_model_available(self):
-        """Verify the embedding model is available."""
-        from google import genai
-        api_key = os.environ.get("GEMINI_API_KEY")
-        client = genai.Client(api_key=api_key)
-        models = list(client.models.list())
-        model_names = [m.name for m in models]
-        # Check for embedding model (gemini-embedding-001)
-        has_embedding_model = any("embedding" in name.lower() for name in model_names)
-        assert has_embedding_model, (
-            f"No embedding models available. "
-            f"Available models: {model_names[:10]}..."
-        )

tests/test_openrouter_connection.py ADDED Viewed

	@@ -0,0 +1,118 @@

+"""
+Tests for OpenRouter API connection.
+Verifies that the API key is configured correctly and can connect
+to the OpenRouter API without consuming many tokens.
+"""
+import os
+import pytest
+import requests
+from dotenv import load_dotenv
+# Load environment variables for tests
+load_dotenv()
+# OpenRouter API endpoint
+OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
+class TestOpenRouterConnection:
+    """Test suite for OpenRouter API connectivity."""
+    def test_api_key_is_set(self):
+        """Verify OPENROUTER_API_KEY environment variable is configured."""
+        api_key = os.environ.get("OPENROUTER_API_KEY")
+        assert api_key is not None, "OPENROUTER_API_KEY environment variable is not set"
+        assert len(api_key) > 0, "OPENROUTER_API_KEY is empty"
+        # OpenRouter keys start with "sk-or-"
+        assert len(api_key) > 10, "OPENROUTER_API_KEY appears too short to be valid"
+    @pytest.mark.timeout(30)
+    def test_can_list_models(self):
+        """
+        Test API connectivity by listing available models.
+        This verifies the API key is valid without consuming generation tokens.
+        """
+        api_key = os.environ.get("OPENROUTER_API_KEY")
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+        response = requests.get(
+            f"{OPENROUTER_BASE_URL}/models",
+            headers=headers,
+            timeout=30
+        )
+        response.raise_for_status()
+        data = response.json()
+        models = data.get("data", [])
+        assert len(models) > 0, "No models returned - API key may be invalid"
+        # Verify we can see some models
+        model_ids = [m.get("id", "") for m in models]
+        assert len(model_ids) > 0, "No model IDs found in available models list"
+    @pytest.mark.timeout(30)
+    def test_chat_model_available(self):
+        """Verify the configured chat model is available."""
+        from utils.openrouter_client import DEFAULT_CHAT_MODEL
+        api_key = os.environ.get("OPENROUTER_API_KEY")
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+        response = requests.get(
+            f"{OPENROUTER_BASE_URL}/models",
+            headers=headers,
+            timeout=30
+        )
+        response.raise_for_status()
+        data = response.json()
+        models = data.get("data", [])
+        model_ids = [m.get("id", "") for m in models]
+        # Check for the configured model or similar
+        model_family = DEFAULT_CHAT_MODEL.split("/")[0] if "/" in DEFAULT_CHAT_MODEL else DEFAULT_CHAT_MODEL
+        has_model = any(model_family in mid for mid in model_ids)
+        assert has_model or DEFAULT_CHAT_MODEL in model_ids, (
+            f"Chat model '{DEFAULT_CHAT_MODEL}' or similar not found. "
+            f"Sample available models: {model_ids[:10]}..."
+        )
+    @pytest.mark.timeout(30)
+    def test_embedding_model_available(self):
+        """Verify an embedding model is available."""
+        api_key = os.environ.get("OPENROUTER_API_KEY")
+        headers = {
+            "Authorization": f"Bearer {api_key}",
+            "Content-Type": "application/json"
+        }
+        response = requests.get(
+            f"{OPENROUTER_BASE_URL}/models",
+            headers=headers,
+            timeout=30
+        )
+        response.raise_for_status()
+        data = response.json()
+        models = data.get("data", [])
+        model_ids = [m.get("id", "") for m in models]
+        # Check for embedding model
+        has_embedding_model = any("embedding" in mid.lower() for mid in model_ids)
+        # OpenRouter may not list embedding models separately, so this is a soft check
+        if not has_embedding_model:
+            print(f"Note: No embedding models explicitly listed. Available: {model_ids[:10]}...")

todo.md CHANGED Viewed

@@ -151,15 +151,22 @@
 - [x] Tests verify: API key is set, can list models, flash model available
 - Note: On Hugging Face Spaces, set `GEMINI_API_KEY` in Repository Secrets
-## Completed: Migrate to New Google GenAI SDK
 - [x] Update `requirements.txt`: `google-generativeai` → `google-genai>=1.0.0`
 - [x] Rewrite `utils/gemini_client.py` using new centralized Client architecture
-  - New import: `from google import genai` and `from google.genai import types`
-  - Client-based API: `client = genai.Client(api_key=...)`
-  - Chat via: `client.chats.create(model=..., config=..., history=...)`
-  - Embeddings via: `client.models.embed_content(model=..., contents=..., config=...)`
-- [x] Update embedding model: `models/text-embedding-004` → `gemini-embedding-001`
-- [x] Update `tests/test_gemini_connection.py` to use new SDK
 - [x] All 4 connection tests pass
 - [x] Verified: embeddings work (3072 dimensions), chat generation works

 - [x] Tests verify: API key is set, can list models, flash model available
 - Note: On Hugging Face Spaces, set `GEMINI_API_KEY` in Repository Secrets
+## Completed: Migrate to New Google GenAI SDK (Superseded)
 - [x] Update `requirements.txt`: `google-generativeai` → `google-genai>=1.0.0`
 - [x] Rewrite `utils/gemini_client.py` using new centralized Client architecture
 - [x] All 4 connection tests pass
 - [x] Verified: embeddings work (3072 dimensions), chat generation works
+## Completed: Migrate from Gemini to OpenRouter
+- [x] Create `utils/openrouter_client.py` with OpenAI-compatible API
+  - Global model config: `DEFAULT_CHAT_MODEL` and `DEFAULT_EMBEDDING_MODEL`
+  - Chat via: `POST /api/v1/chat/completions`
+  - Embeddings via: `POST /api/v1/embeddings`
+- [x] Update `utils/rag_utils.py` imports to use openrouter_client
+- [x] Update `app.py` imports to use openrouter_client
+- [x] Create `tests/test_openrouter_connection.py` for API connectivity tests
+- [x] Delete old `utils/gemini_client.py` and `tests/test_gemini_connection.py`
+- [x] Update `requirements.txt`: remove `google-genai`, add `requests>=2.28.0`
+- [x] Environment variable: `GEMINI_API_KEY` → `OPENROUTER_API_KEY`

utils/{gemini_client.py → openrouter_client.py} RENAMED Viewed

@@ -1,21 +1,29 @@
 """
-Gemini API Client
-Wrapper for Google Gemini API providing text generation and embedding capabilities
 for the AI chatbot feature.
-Uses the new google-genai SDK (migrated from deprecated google-generativeai).
 """
 import os
 from typing import List, Dict, Optional
-from google import genai
-from google.genai import types
-# Default model configuration
-DEFAULT_GENERATION_MODEL = "gemini-2.0-flash"
-DEFAULT_EMBEDDING_MODEL = "gemini-embedding-001"
 # System prompt for the chatbot
 SYSTEM_PROMPT = """You are a helpful AI assistant integrated into a Transformer Explanation Dashboard.
@@ -36,39 +44,38 @@ When answering:
 Dashboard context will be provided in the user's messages when available."""
-class GeminiClient:
-    """Client for interacting with Google Gemini API."""
     def __init__(self, api_key: Optional[str] = None):
         """
-        Initialize the Gemini client.
         Args:
-            api_key: Gemini API key. If not provided, reads from GEMINI_API_KEY env var.
         """
-        self.api_key = api_key or os.environ.get("GEMINI_API_KEY")
         self._initialized = False
-        self._client = None
         if self.api_key:
             self._initialize()
     def _initialize(self):
-        """Initialize the Gemini API client."""
         if not self.api_key:
             return
-        try:
-            # Create the centralized client object (new SDK architecture)
-            self._client = genai.Client(api_key=self.api_key)
-            self._initialized = True
-        except Exception as e:
-            print(f"Error initializing Gemini client: {e}")
-            self._initialized = False
     @property
     def is_available(self) -> bool:
-        """Check if the Gemini API is available and configured."""
         return self._initialized and self.api_key is not None
     def generate_response(
@@ -79,7 +86,7 @@ class GeminiClient:
         dashboard_context: Optional[Dict] = None
     ) -> str:
         """
-        Generate a response using Gemini.
         Args:
             user_message: The user's message
@@ -91,43 +98,61 @@ class GeminiClient:
             Generated response text
         """
         if not self.is_available:
-            return "Sorry, the AI assistant is not available. Please check that the GEMINI_API_KEY environment variable is set."
         try:
             # Build the full prompt with context
             full_message = self._build_prompt(user_message, rag_context, dashboard_context)
-            # Convert chat history to new SDK format
-            history = []
             if chat_history:
                 for msg in chat_history[-10:]:  # Keep last 10 messages for context
-                    role = "user" if msg.get("role") == "user" else "model"
-                    history.append({
                         "role": role,
-                        "parts": [{"text": msg.get("content", "")}]
                     })
-            # Create chat session with system instruction and send message
-            chat = self._client.chats.create(
-                model=DEFAULT_GENERATION_MODEL,
-                config=types.GenerateContentConfig(
-                    system_instruction=SYSTEM_PROMPT,
-                ),
-                history=history
             )
-            response = chat.send_message(message=full_message)
-            return response.text
-        except Exception as e:
             error_msg = str(e)
-            if "quota" in error_msg.lower() or "rate" in error_msg.lower():
                 return f"The AI service is currently rate limited. Please try again in a moment. {error_msg}"
-            elif "invalid" in error_msg.lower() and "key" in error_msg.lower():
-                return "Invalid API key. Please check your GEMINI_API_KEY configuration."
             else:
-                print(f"Gemini API error: {e}")
                 return f"Sorry, I encountered an error: {error_msg}"
     def _build_prompt(
         self,
@@ -180,7 +205,7 @@ class GeminiClient:
     def get_embedding(self, text: str) -> Optional[List[float]]:
         """
-        Get embedding vector for text using Gemini Embedding API.
         Args:
             text: Text to embed
@@ -192,22 +217,29 @@ class GeminiClient:
             return None
         try:
-            result = self._client.models.embed_content(
-                model=DEFAULT_EMBEDDING_MODEL,
-                contents=text,
-                config=types.EmbedContentConfig(
-                    task_type="RETRIEVAL_DOCUMENT"
-                )
             )
-            # New SDK returns embeddings as a list, get the first one
-            return result.embeddings[0].values
         except Exception as e:
             print(f"Embedding error: {e}")
             return None
     def get_query_embedding(self, query: str) -> Optional[List[float]]:
         """
-        Get embedding vector for a query (uses different task type for better retrieval).
         Args:
             query: Query text to embed
@@ -215,33 +247,18 @@ class GeminiClient:
         Returns:
             Embedding vector as list of floats, or None if failed
         """
-        if not self.is_available:
-            return None
-        try:
-            result = self._client.models.embed_content(
-                model=DEFAULT_EMBEDDING_MODEL,
-                contents=query,
-                config=types.EmbedContentConfig(
-                    task_type="RETRIEVAL_QUERY"
-                )
-            )
-            # New SDK returns embeddings as a list, get the first one
-            return result.embeddings[0].values
-        except Exception as e:
-            print(f"Query embedding error: {e}")
-            return None
 # Singleton instance
-_client_instance: Optional[GeminiClient] = None
-def get_gemini_client() -> GeminiClient:
-    """Get or create the singleton Gemini client instance."""
     global _client_instance
     if _client_instance is None:
-        _client_instance = GeminiClient()
     return _client_instance
@@ -263,17 +280,22 @@ def generate_response(
     Returns:
         Generated response text
     """
-    client = get_gemini_client()
     return client.generate_response(user_message, chat_history, rag_context, dashboard_context)
 def get_embedding(text: str) -> Optional[List[float]]:
     """Convenience function to get document embedding."""
-    client = get_gemini_client()
     return client.get_embedding(text)
 def get_query_embedding(query: str) -> Optional[List[float]]:
     """Convenience function to get query embedding."""
-    client = get_gemini_client()
     return client.get_query_embedding(query)

 """
+OpenRouter API Client
+Wrapper for OpenRouter API providing text generation and embedding capabilities
 for the AI chatbot feature.
+Uses the OpenAI-compatible API via requests.
 """
 import os
+import requests
 from typing import List, Dict, Optional
+# =============================================================================
+# GLOBAL MODEL CONFIGURATION
+# =============================================================================
+# Change these to switch models across the entire application
+DEFAULT_CHAT_MODEL = "google/gemini-2.0-flash-001"
+DEFAULT_EMBEDDING_MODEL = "openai/text-embedding-3-small"
+# =============================================================================
+# OpenRouter API endpoint
+OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
 # System prompt for the chatbot
 SYSTEM_PROMPT = """You are a helpful AI assistant integrated into a Transformer Explanation Dashboard.
 Dashboard context will be provided in the user's messages when available."""
+class OpenRouterClient:
+    """Client for interacting with OpenRouter API."""
     def __init__(self, api_key: Optional[str] = None):
         """
+        Initialize the OpenRouter client.
         Args:
+            api_key: OpenRouter API key. If not provided, reads from OPENROUTER_API_KEY env var.
         """
+        self.api_key = api_key or os.environ.get("OPENROUTER_API_KEY")
         self._initialized = False
         if self.api_key:
             self._initialize()
     def _initialize(self):
+        """Initialize the OpenRouter API client."""
         if not self.api_key:
             return
+        self._headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "https://transformer-dashboard.local",  # Optional: for rankings
+            "X-Title": "Transformer Explanation Dashboard"  # Optional: for rankings
+        }
+        self._initialized = True
     @property
     def is_available(self) -> bool:
+        """Check if the OpenRouter API is available and configured."""
         return self._initialized and self.api_key is not None
     def generate_response(
         dashboard_context: Optional[Dict] = None
     ) -> str:
         """
+        Generate a response using OpenRouter.
         Args:
             user_message: The user's message
             Generated response text
         """
         if not self.is_available:
+            return "Sorry, the AI assistant is not available. Please check that the OPENROUTER_API_KEY environment variable is set."
         try:
             # Build the full prompt with context
             full_message = self._build_prompt(user_message, rag_context, dashboard_context)
+            # Build messages array with system prompt and history
+            messages = [{"role": "system", "content": SYSTEM_PROMPT}]
+            # Add chat history
             if chat_history:
                 for msg in chat_history[-10:]:  # Keep last 10 messages for context
+                    role = "user" if msg.get("role") == "user" else "assistant"
+                    messages.append({
                         "role": role,
+                        "content": msg.get("content", "")
                     })
+            # Add the current user message
+            messages.append({"role": "user", "content": full_message})
+            # Make API request
+            response = requests.post(
+                f"{OPENROUTER_BASE_URL}/chat/completions",
+                headers=self._headers,
+                json={
+                    "model": DEFAULT_CHAT_MODEL,
+                    "messages": messages
+                },
+                timeout=60
             )
+            response.raise_for_status()
+            data = response.json()
+            return data["choices"][0]["message"]["content"]
+        except requests.exceptions.HTTPError as e:
             error_msg = str(e)
+            if e.response is not None:
+                try:
+                    error_data = e.response.json()
+                    error_msg = error_data.get("error", {}).get("message", str(e))
+                except:
+                    pass
+            if "rate" in error_msg.lower() or "429" in error_msg:
                 return f"The AI service is currently rate limited. Please try again in a moment. {error_msg}"
+            elif "401" in error_msg or "invalid" in error_msg.lower():
+                return "Invalid API key. Please check your OPENROUTER_API_KEY configuration."
             else:
+                print(f"OpenRouter API error: {e}")
                 return f"Sorry, I encountered an error: {error_msg}"
+        except Exception as e:
+            print(f"OpenRouter API error: {e}")
+            return f"Sorry, I encountered an error: {str(e)}"
     def _build_prompt(
         self,
     def get_embedding(self, text: str) -> Optional[List[float]]:
         """
+        Get embedding vector for text using OpenRouter Embedding API.
         Args:
             text: Text to embed
             return None
         try:
+            response = requests.post(
+                f"{OPENROUTER_BASE_URL}/embeddings",
+                headers=self._headers,
+                json={
+                    "model": DEFAULT_EMBEDDING_MODEL,
+                    "input": text
+                },
+                timeout=30
             )
+            response.raise_for_status()
+            data = response.json()
+            return data["data"][0]["embedding"]
         except Exception as e:
             print(f"Embedding error: {e}")
             return None
     def get_query_embedding(self, query: str) -> Optional[List[float]]:
         """
+        Get embedding vector for a query.
+        Note: OpenRouter doesn't have separate task types for embeddings,
+        so this calls the same endpoint as get_embedding.
         Args:
             query: Query text to embed
         Returns:
             Embedding vector as list of floats, or None if failed
         """
+        return self.get_embedding(query)
 # Singleton instance
+_client_instance: Optional[OpenRouterClient] = None
+def get_openrouter_client() -> OpenRouterClient:
+    """Get or create the singleton OpenRouter client instance."""
     global _client_instance
     if _client_instance is None:
+        _client_instance = OpenRouterClient()
     return _client_instance
     Returns:
         Generated response text
     """
+    client = get_openrouter_client()
     return client.generate_response(user_message, chat_history, rag_context, dashboard_context)
 def get_embedding(text: str) -> Optional[List[float]]:
     """Convenience function to get document embedding."""
+    client = get_openrouter_client()
     return client.get_embedding(text)
 def get_query_embedding(query: str) -> Optional[List[float]]:
     """Convenience function to get query embedding."""
+    client = get_openrouter_client()
     return client.get_query_embedding(query)
+# Backward compatibility aliases (for gradual migration)
+GeminiClient = OpenRouterClient
+get_gemini_client = get_openrouter_client

utils/rag_utils.py CHANGED Viewed

@@ -11,7 +11,7 @@ from pathlib import Path
 from typing import List, Dict, Optional, Tuple
 import numpy as np
-from utils.gemini_client import get_embedding, get_query_embedding, get_gemini_client
 # Configuration
@@ -189,9 +189,9 @@ class RAGService:
         if not self._loaded:
             self.load_documents()
-        client = get_gemini_client()
         if not client.is_available:
-            print("Gemini client not available, skipping embedding generation")
             return 0
         embedded_count = 0

 from typing import List, Dict, Optional, Tuple
 import numpy as np
+from utils.openrouter_client import get_embedding, get_query_embedding, get_openrouter_client
 # Configuration
         if not self._loaded:
             self.load_documents()
+        client = get_openrouter_client()
         if not client.is_available:
+            print("OpenRouter client not available, skipping embedding generation")
             return 0
         embedded_count = 0