Spaces:

xsbnhy
/

cli2

Paused

App Files Files Community

cc165 commited on Aug 25, 2025

Commit

9a2e8a5

verified ·

1 Parent(s): ec52c24

Upload 16 files

Browse files

Files changed (16) hide show

Dockerfile +38 -0
LICENSE +21 -0
README.md +199 -5
app.py +14 -0
requirements.txt +6 -0
run.py +8 -0
src/__init__.py +0 -0
src/auth.py +598 -0
src/config.py +247 -0
src/gemini_routes.py +186 -0
src/google_api_client.py +340 -0
src/main.py +143 -0
src/models.py +72 -0
src/openai_routes.py +305 -0
src/openai_transformers.py +260 -0
src/utils.py +38 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    gcc \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create non-root user for security
+RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
+USER appuser
+# Expose ports (8888 for compatibility, 7860 for Hugging Face)
+EXPOSE 8888 7860
+# Set environment variables
+ENV PYTHONPATH=/app
+ENV HOST=0.0.0.0
+ENV PORT=7860
+# Health check (use PORT environment variable)
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:${PORT}/health || exit 1
+# Run the application using app.py (Hugging Face compatible entry point)
+CMD ["python", "app.py"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Gemini CLI to API Proxy
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,10 +1,204 @@
 ---
-title: Geminicli2api
-emoji: 🏆
-colorFrom: indigo
-colorTo: blue
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Gemini CLI to API Proxy
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
 sdk: docker
 pinned: false
+license: mit
+app_port: 7860
 ---
+# Gemini CLI to API Proxy (geminicli2api)
+A FastAPI-based proxy server that converts the Gemini CLI tool into both OpenAI-compatible and native Gemini API endpoints. This allows you to leverage Google's free Gemini API quota through familiar OpenAI API interfaces or direct Gemini API calls.
+## 🚀 Features
+- **OpenAI-Compatible API**: Drop-in replacement for OpenAI's chat completions API
+- **Native Gemini API**: Direct proxy to Google's Gemini API
+- **Streaming Support**: Real-time streaming responses for both API formats
+- **Multimodal Support**: Text and image inputs
+- **Authentication**: Multiple auth methods (Bearer, Basic, API key)
+- **Google Search Grounding**: Enable Google Search for grounded responses using `-search` models.
+- **Thinking/Reasoning Control**: Control Gemini's thinking process with `-nothinking` and `-maxthinking` models.
+- **Docker Ready**: Containerized for easy deployment
+- **Hugging Face Spaces**: Ready for deployment on Hugging Face
+## 🔧 Environment Variables
+### Required
+- `GEMINI_AUTH_PASSWORD`: Authentication password for API access
+### Optional Credential Sources (choose one)
+- `GEMINI_CREDENTIALS`: JSON string containing Google OAuth credentials
+- `GOOGLE_APPLICATION_CREDENTIALS`: Path to Google OAuth credentials file
+- `GOOGLE_CLOUD_PROJECT`: Google Cloud project ID
+- `GEMINI_PROJECT_ID`: Alternative project ID variable
+### Example Credentials JSON
+```json
+{
+  "client_id": "your-client-id",
+  "client_secret": "your-client-secret",
+  "token": "your-access-token",
+  "refresh_token": "your-refresh-token",
+  "scopes": ["https://www.googleapis.com/auth/cloud-platform"],
+  "token_uri": "https://oauth2.googleapis.com/token"
+}
+```
+## 📡 API Endpoints
+### OpenAI-Compatible Endpoints
+- `POST /v1/chat/completions` - Chat completions (streaming & non-streaming)
+- `GET /v1/models` - List available models
+### Native Gemini Endpoints
+- `GET /v1beta/models` - List Gemini models
+- `POST /v1beta/models/{model}:generateContent` - Generate content
+- `POST /v1beta/models/{model}:streamGenerateContent` - Stream content
+- All other Gemini API endpoints are proxied through
+### Utility Endpoints
+- `GET /health` - Health check for container orchestration
+## 🔐 Authentication
+The API supports multiple authentication methods:
+1. **Bearer Token**: `Authorization: Bearer YOUR_PASSWORD`
+2. **Basic Auth**: `Authorization: Basic base64(username:YOUR_PASSWORD)`
+3. **Query Parameter**: `?key=YOUR_PASSWORD`
+4. **Google Header**: `x-goog-api-key: YOUR_PASSWORD`
+## 🐳 Docker Usage
+```bash
+# Build the image
+docker build -t geminicli2api .
+# Run on default port 8888 (compatibility)
+docker run -p 8888:8888 \
+  -e GEMINI_AUTH_PASSWORD=your_password \
+  -e GEMINI_CREDENTIALS='{"client_id":"...","token":"..."}' \
+  -e PORT=8888 \
+  geminicli2api
+# Run on port 7860 (Hugging Face compatible)
+docker run -p 7860:7860 \
+  -e GEMINI_AUTH_PASSWORD=your_password \
+  -e GEMINI_CREDENTIALS='{"client_id":"...","token":"..."}' \
+  -e PORT=7860 \
+  geminicli2api
+```
+### Docker Compose
+```bash
+# Default setup (port 8888)
+docker-compose up -d
+# Hugging Face setup (port 7860)
+docker-compose --profile hf up -d geminicli2api-hf
+```
+## 🤗 Hugging Face Spaces
+This project is configured for Hugging Face Spaces deployment:
+1. Fork this repository
+2. Create a new Space on Hugging Face
+3. Connect your repository
+4. Set the required environment variables in Space settings:
+   - `GEMINI_AUTH_PASSWORD`
+   - `GEMINI_CREDENTIALS` (or other credential source)
+The Space will automatically build and deploy using the included Dockerfile.
+## 📝 OpenAI API Example
+```python
+import openai
+# Configure client to use your proxy
+client = openai.OpenAI(
+    base_url="http://localhost:8888/v1",  # or 7860 for HF
+    api_key="your_password"  # Your GEMINI_AUTH_PASSWORD
+)
+# Use like normal OpenAI API
+response = client.chat.completions.create(
+    model="gemini-2.5-pro-maxthinking",
+    messages=[
+        {"role": "user", "content": "Explain the theory of relativity in simple terms."}
+    ],
+    stream=True
+)
+# Separate reasoning from the final answer
+for chunk in response:
+    if chunk.choices[0].delta.reasoning_content:
+        print(f"Thinking: {chunk.choices[0].delta.reasoning_content}")
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+## 🔧 Native Gemini API Example
+```python
+import requests
+headers = {
+    "Authorization": "Bearer your_password",
+    "Content-Type": "application/json"
+}
+data = {
+    "contents": [
+        {
+            "role": "user",
+            "parts": [{"text": "Explain the theory of relativity in simple terms."}]
+        }
+    ],
+    "thinkingConfig": {
+        "thinkingBudget": 32768,
+        "includeThoughts": True
+    }
+}
+response = requests.post(
+    "http://localhost:8888/v1beta/models/gemini-2.5-pro:generateContent",  # or 7860 for HF
+    headers=headers,
+    json=data
+)
+print(response.json())
+```
+## 🎯 Supported Models
+### Base Models
+- `gemini-2.5-pro`
+- `gemini-2.5-flash`
+- `gemini-1.5-pro`
+- `gemini-1.5-flash`
+- `gemini-1.0-pro`
+### Model Variants
+The proxy automatically creates variants for `gemini-2.5-pro` and `gemini-2.5-flash` models:
+- **`-search`**: Appends `-search` to a model name to enable Google Search grounding.
+  - Example: `gemini-2.5-pro-search`
+- **`-nothinking`**: Appends `-nothinking` to minimize reasoning steps.
+  - Example: `gemini-2.5-flash-nothinking`
+- **`-maxthinking`**: Appends `-maxthinking` to maximize the reasoning budget.
+  - Example: `gemini-2.5-pro-maxthinking`
+## 📄 License
+MIT License - see LICENSE file for details.
+## 🤝 Contributing
+Contributions are welcome! Please feel free to submit a Pull Request.

app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+"""
+Hugging Face Spaces entry point.
+This file is required for Hugging Face Spaces deployment.
+"""
+from src.main import app
+# Hugging Face Spaces will automatically run this app
+if __name__ == "__main__":
+    import uvicorn
+    import os
+    host = os.getenv("HOST", "0.0.0.0")
+    port = int(os.getenv("PORT", "7860"))
+    uvicorn.run(app, host=host, port=port)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi
+uvicorn[standard]
+requests
+python-dotenv
+google-auth-oauthlib
+pydantic

run.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import os
+import uvicorn
+from src.main import app
+if __name__ == "__main__":
+    host = os.getenv("HOST", "0.0.0.0")
+    port = int(os.getenv("PORT", "8888"))
+    uvicorn.run(app, host=host, port=port)

src/__init__.py ADDED Viewed

File without changes

src/auth.py ADDED Viewed

	@@ -0,0 +1,598 @@

+import os
+import json
+import base64
+import time
+import logging
+from datetime import datetime
+from fastapi import Request, HTTPException, Depends
+from fastapi.security import HTTPBasic
+from http.server import BaseHTTPRequestHandler, HTTPServer
+from urllib.parse import urlparse, parse_qs
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import Flow
+from google.auth.transport.requests import Request as GoogleAuthRequest
+from .utils import get_user_agent, get_client_metadata
+from .config import (
+    CLIENT_ID, CLIENT_SECRET, SCOPES, CREDENTIAL_FILE,
+    CODE_ASSIST_ENDPOINT, GEMINI_AUTH_PASSWORD
+)
+# --- Global State ---
+credentials = None
+user_project_id = None
+onboarding_complete = False
+credentials_from_env = False  # Track if credentials came from environment variable
+security = HTTPBasic()
+class _OAuthCallbackHandler(BaseHTTPRequestHandler):
+    auth_code = None
+    def do_GET(self):
+        query_components = parse_qs(urlparse(self.path).query)
+        code = query_components.get("code", [None])[0]
+        if code:
+            _OAuthCallbackHandler.auth_code = code
+            self.send_response(200)
+            self.send_header("Content-type", "text/html")
+            self.end_headers()
+            self.wfile.write(b"<h1>OAuth authentication successful!</h1><p>You can close this window. Please check the proxy server logs to verify that onboarding completed successfully. No need to restart the proxy.</p>")
+        else:
+            self.send_response(400)
+            self.send_header("Content-type", "text/html")
+            self.end_headers()
+            self.wfile.write(b"<h1>Authentication failed.</h1><p>Please try again.</p>")
+def authenticate_user(request: Request):
+    """Authenticate the user with multiple methods."""
+    # Check for API key in query parameters first (for Gemini client compatibility)
+    api_key = request.query_params.get("key")
+    if api_key and api_key == GEMINI_AUTH_PASSWORD:
+        return "api_key_user"
+    # Check for API key in x-goog-api-key header (Google SDK format)
+    goog_api_key = request.headers.get("x-goog-api-key", "")
+    if goog_api_key and goog_api_key == GEMINI_AUTH_PASSWORD:
+        return "goog_api_key_user"
+    # Check for API key in Authorization header (Bearer token format)
+    auth_header = request.headers.get("authorization", "")
+    if auth_header.startswith("Bearer "):
+        bearer_token = auth_header[7:]
+        if bearer_token == GEMINI_AUTH_PASSWORD:
+            return "bearer_user"
+    # Check for HTTP Basic Authentication
+    if auth_header.startswith("Basic "):
+        try:
+            encoded_credentials = auth_header[6:]
+            decoded_credentials = base64.b64decode(encoded_credentials).decode('utf-8', "ignore")
+            username, password = decoded_credentials.split(':', 1)
+            if password == GEMINI_AUTH_PASSWORD:
+                return username
+        except Exception:
+            pass
+    # If none of the authentication methods work
+    raise HTTPException(
+        status_code=401,
+        detail="Invalid authentication credentials. Use HTTP Basic Auth, Bearer token, 'key' query parameter, or 'x-goog-api-key' header.",
+        headers={"WWW-Authenticate": "Basic"},
+    )
+def save_credentials(creds, project_id=None):
+    global credentials_from_env
+    # Don't save credentials to file if they came from environment variable,
+    # but still save project_id if provided and no file exists or file lacks project_id
+    if credentials_from_env:
+        if project_id and os.path.exists(CREDENTIAL_FILE):
+            try:
+                with open(CREDENTIAL_FILE, "r") as f:
+                    existing_data = json.load(f)
+                # Only update project_id if it's missing from the file
+                if "project_id" not in existing_data:
+                    existing_data["project_id"] = project_id
+                    with open(CREDENTIAL_FILE, "w") as f:
+                        json.dump(existing_data, f, indent=2)
+                    logging.info(f"Added project_id {project_id} to existing credential file")
+            except Exception as e:
+                logging.warning(f"Could not update project_id in credential file: {e}")
+        return
+    creds_data = {
+        "client_id": CLIENT_ID,
+        "client_secret": CLIENT_SECRET,
+        "token": creds.token,
+        "refresh_token": creds.refresh_token,
+        "scopes": creds.scopes if creds.scopes else SCOPES,
+        "token_uri": "https://oauth2.googleapis.com/token",
+    }
+    if creds.expiry:
+        if creds.expiry.tzinfo is None:
+            from datetime import timezone
+            expiry_utc = creds.expiry.replace(tzinfo=timezone.utc)
+        else:
+            expiry_utc = creds.expiry
+        # Keep the existing ISO format for backward compatibility, but ensure it's properly handled during loading
+        creds_data["expiry"] = expiry_utc.isoformat()
+    if project_id:
+        creds_data["project_id"] = project_id
+    elif os.path.exists(CREDENTIAL_FILE):
+        try:
+            with open(CREDENTIAL_FILE, "r") as f:
+                existing_data = json.load(f)
+                if "project_id" in existing_data:
+                    creds_data["project_id"] = existing_data["project_id"]
+        except Exception:
+            pass
+    with open(CREDENTIAL_FILE, "w") as f:
+        json.dump(creds_data, f, indent=2)
+def get_credentials(allow_oauth_flow=True):
+    """Loads credentials matching gemini-cli OAuth2 flow."""
+    global credentials, credentials_from_env, user_project_id
+    if credentials and credentials.token:
+        return credentials
+    # Check for credentials in environment variable (JSON string)
+    env_creds_json = os.getenv("GEMINI_CREDENTIALS")
+    if env_creds_json:
+        # First, check if we have a refresh token - if so, we should always be able to load credentials
+        try:
+            raw_env_creds_data = json.loads(env_creds_json)
+            # SAFEGUARD: If refresh_token exists, we should always load credentials successfully
+            if "refresh_token" in raw_env_creds_data and raw_env_creds_data["refresh_token"]:
+                logging.info("Environment refresh token found - ensuring credentials load successfully")
+                try:
+                    creds_data = raw_env_creds_data.copy()
+                    # Handle different credential formats
+                    if "access_token" in creds_data and "token" not in creds_data:
+                        creds_data["token"] = creds_data["access_token"]
+                    if "scope" in creds_data and "scopes" not in creds_data:
+                        creds_data["scopes"] = creds_data["scope"].split()
+                    # Handle problematic expiry formats that cause parsing errors
+                    if "expiry" in creds_data:
+                        expiry_str = creds_data["expiry"]
+                        # If expiry has timezone info that causes parsing issues, try to fix it
+                        if isinstance(expiry_str, str) and ("+00:00" in expiry_str or "Z" in expiry_str):
+                            try:
+                                # Try to parse and reformat the expiry to a format Google Credentials can handle
+                                from datetime import datetime
+                                if "+00:00" in expiry_str:
+                                    # Handle ISO format with timezone offset
+                                    parsed_expiry = datetime.fromisoformat(expiry_str)
+                                elif expiry_str.endswith("Z"):
+                                    # Handle ISO format with Z suffix
+                                    parsed_expiry = datetime.fromisoformat(expiry_str.replace('Z', '+00:00'))
+                                else:
+                                    parsed_expiry = datetime.fromisoformat(expiry_str)
+                                # Convert to UTC timestamp format that Google Credentials library expects
+                                import time
+                                timestamp = parsed_expiry.timestamp()
+                                creds_data["expiry"] = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%dT%H:%M:%SZ")
+                                logging.info(f"Converted environment expiry format from '{expiry_str}' to '{creds_data['expiry']}'")
+                            except Exception as expiry_error:
+                                logging.warning(f"Could not parse environment expiry format '{expiry_str}': {expiry_error}, removing expiry field")
+                                # Remove problematic expiry field - credentials will be treated as expired but still loadable
+                                del creds_data["expiry"]
+                    credentials = Credentials.from_authorized_user_info(creds_data, SCOPES)
+                    credentials_from_env = True  # Mark as environment credentials
+                    # Extract project_id from environment credentials if available
+                    if "project_id" in raw_env_creds_data:
+                        user_project_id = raw_env_creds_data["project_id"]
+                        logging.info(f"Extracted project_id from environment credentials: {user_project_id}")
+                    # Try to refresh if expired and refresh token exists
+                    if credentials.expired and credentials.refresh_token:
+                        try:
+                            logging.info("Environment credentials expired, attempting refresh...")
+                            credentials.refresh(GoogleAuthRequest())
+                            logging.info("Environment credentials refreshed successfully")
+                        except Exception as refresh_error:
+                            logging.warning(f"Failed to refresh environment credentials: {refresh_error}")
+                            logging.info("Using existing environment credentials despite refresh failure")
+                    elif not credentials.expired:
+                        logging.info("Environment credentials are still valid, no refresh needed")
+                    elif not credentials.refresh_token:
+                        logging.warning("Environment credentials expired but no refresh token available")
+                    return credentials
+                except Exception as parsing_error:
+                    # SAFEGUARD: Even if parsing fails, try to create minimal credentials with refresh token
+                    logging.warning(f"Failed to parse environment credentials normally: {parsing_error}")
+                    logging.info("Attempting to create minimal environment credentials with refresh token")
+                    try:
+                        minimal_creds_data = {
+                            "client_id": raw_env_creds_data.get("client_id", CLIENT_ID),
+                            "client_secret": raw_env_creds_data.get("client_secret", CLIENT_SECRET),
+                            "refresh_token": raw_env_creds_data["refresh_token"],
+                            "token_uri": "https://oauth2.googleapis.com/token",
+                        }
+                        credentials = Credentials.from_authorized_user_info(minimal_creds_data, SCOPES)
+                        credentials_from_env = True  # Mark as environment credentials
+                        # Extract project_id from environment credentials if available
+                        if "project_id" in raw_env_creds_data:
+                            user_project_id = raw_env_creds_data["project_id"]
+                            logging.info(f"Extracted project_id from minimal environment credentials: {user_project_id}")
+                        # Force refresh since we don't have a valid token
+                        try:
+                            logging.info("Refreshing minimal environment credentials...")
+                            credentials.refresh(GoogleAuthRequest())
+                            logging.info("Minimal environment credentials refreshed successfully")
+                            return credentials
+                        except Exception as refresh_error:
+                            logging.error(f"Failed to refresh minimal environment credentials: {refresh_error}")
+                            # Even if refresh fails, return the credentials - they might still work
+                            return credentials
+                    except Exception as minimal_error:
+                        logging.error(f"Failed to create minimal environment credentials: {minimal_error}")
+                        # Fall through to file-based credentials
+            else:
+                logging.warning("No refresh token found in environment credentials")
+                # Fall through to file-based credentials
+        except Exception as e:
+            logging.error(f"Failed to parse environment credentials JSON: {e}")
+            # Fall through to file-based credentials
+    # Check for credentials file (CREDENTIAL_FILE now includes GOOGLE_APPLICATION_CREDENTIALS path if set)
+    if os.path.exists(CREDENTIAL_FILE):
+        # First, check if we have a refresh token - if so, we should always be able to load credentials
+        try:
+            with open(CREDENTIAL_FILE, "r") as f:
+                raw_creds_data = json.load(f)
+            # SAFEGUARD: If refresh_token exists, we should always load credentials successfully
+            if "refresh_token" in raw_creds_data and raw_creds_data["refresh_token"]:
+                logging.info("Refresh token found - ensuring credentials load successfully")
+                try:
+                    creds_data = raw_creds_data.copy()
+                    # Handle different credential formats
+                    if "access_token" in creds_data and "token" not in creds_data:
+                        creds_data["token"] = creds_data["access_token"]
+                    if "scope" in creds_data and "scopes" not in creds_data:
+                        creds_data["scopes"] = creds_data["scope"].split()
+                    # Handle problematic expiry formats that cause parsing errors
+                    if "expiry" in creds_data:
+                        expiry_str = creds_data["expiry"]
+                        # If expiry has timezone info that causes parsing issues, try to fix it
+                        if isinstance(expiry_str, str) and ("+00:00" in expiry_str or "Z" in expiry_str):
+                            try:
+                                # Try to parse and reformat the expiry to a format Google Credentials can handle
+                                from datetime import datetime
+                                if "+00:00" in expiry_str:
+                                    # Handle ISO format with timezone offset
+                                    parsed_expiry = datetime.fromisoformat(expiry_str)
+                                elif expiry_str.endswith("Z"):
+                                    # Handle ISO format with Z suffix
+                                    parsed_expiry = datetime.fromisoformat(expiry_str.replace('Z', '+00:00'))
+                                else:
+                                    parsed_expiry = datetime.fromisoformat(expiry_str)
+                                # Convert to UTC timestamp format that Google Credentials library expects
+                                import time
+                                timestamp = parsed_expiry.timestamp()
+                                creds_data["expiry"] = datetime.utcfromtimestamp(timestamp).strftime("%Y-%m-%dT%H:%M:%SZ")
+                                logging.info(f"Converted expiry format from '{expiry_str}' to '{creds_data['expiry']}'")
+                            except Exception as expiry_error:
+                                logging.warning(f"Could not parse expiry format '{expiry_str}': {expiry_error}, removing expiry field")
+                                # Remove problematic expiry field - credentials will be treated as expired but still loadable
+                                del creds_data["expiry"]
+                    credentials = Credentials.from_authorized_user_info(creds_data, SCOPES)
+                    # Mark as environment credentials if GOOGLE_APPLICATION_CREDENTIALS was used
+                    credentials_from_env = bool(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
+                    # Try to refresh if expired and refresh token exists
+                    if credentials.expired and credentials.refresh_token:
+                        try:
+                            logging.info("File-based credentials expired, attempting refresh...")
+                            credentials.refresh(GoogleAuthRequest())
+                            logging.info("File-based credentials refreshed successfully")
+                            save_credentials(credentials)
+                        except Exception as refresh_error:
+                            logging.warning(f"Failed to refresh file-based credentials: {refresh_error}")
+                            logging.info("Using existing file-based credentials despite refresh failure")
+                    elif not credentials.expired:
+                        logging.info("File-based credentials are still valid, no refresh needed")
+                    elif not credentials.refresh_token:
+                        logging.warning("File-based credentials expired but no refresh token available")
+                    return credentials
+                except Exception as parsing_error:
+                    # SAFEGUARD: Even if parsing fails, try to create minimal credentials with refresh token
+                    logging.warning(f"Failed to parse credentials normally: {parsing_error}")
+                    logging.info("Attempting to create minimal credentials with refresh token")
+                    try:
+                        minimal_creds_data = {
+                            "client_id": raw_creds_data.get("client_id", CLIENT_ID),
+                            "client_secret": raw_creds_data.get("client_secret", CLIENT_SECRET),
+                            "refresh_token": raw_creds_data["refresh_token"],
+                            "token_uri": "https://oauth2.googleapis.com/token",
+                        }
+                        credentials = Credentials.from_authorized_user_info(minimal_creds_data, SCOPES)
+                        credentials_from_env = bool(os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
+                        # Force refresh since we don't have a valid token
+                        try:
+                            logging.info("Refreshing minimal credentials...")
+                            credentials.refresh(GoogleAuthRequest())
+                            logging.info("Minimal credentials refreshed successfully")
+                            save_credentials(credentials)
+                            return credentials
+                        except Exception as refresh_error:
+                            logging.error(f"Failed to refresh minimal credentials: {refresh_error}")
+                            # Even if refresh fails, return the credentials - they might still work
+                            return credentials
+                    except Exception as minimal_error:
+                        logging.error(f"Failed to create minimal credentials: {minimal_error}")
+                        # Fall through to new login as last resort
+            else:
+                logging.warning("No refresh token found in credentials file")
+                # Fall through to new login
+        except Exception as e:
+            logging.error(f"Failed to read credentials file {CREDENTIAL_FILE}: {e}")
+            # Fall through to new login only if file is completely unreadable
+    # Only start OAuth flow if explicitly allowed
+    if not allow_oauth_flow:
+        logging.info("OAuth flow not allowed - returning None (credentials will be required on first request)")
+        return None
+    client_config = {
+        "installed": {
+            "client_id": CLIENT_ID,
+            "client_secret": CLIENT_SECRET,
+            "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+            "token_uri": "https://oauth2.googleapis.com/token",
+        }
+    }
+    flow = Flow.from_client_config(
+        client_config,
+        scopes=SCOPES,
+        redirect_uri="http://localhost:8080"
+    )
+    flow.oauth2session.scope = SCOPES
+    auth_url, _ = flow.authorization_url(
+        access_type="offline",
+        prompt="consent",
+        include_granted_scopes='true'
+    )
+    print(f"\n{'='*80}")
+    print(f"AUTHENTICATION REQUIRED")
+    print(f"{'='*80}")
+    print(f"Please open this URL in your browser to log in:")
+    print(f"{auth_url}")
+    print(f"{'='*80}\n")
+    logging.info(f"Please open this URL in your browser to log in: {auth_url}")
+    server = HTTPServer(("", 8080), _OAuthCallbackHandler)
+    server.handle_request()
+    auth_code = _OAuthCallbackHandler.auth_code
+    if not auth_code:
+        return None
+    import oauthlib.oauth2.rfc6749.parameters
+    original_validate = oauthlib.oauth2.rfc6749.parameters.validate_token_parameters
+    def patched_validate(params):
+        try:
+            return original_validate(params)
+        except Warning:
+            pass
+    oauthlib.oauth2.rfc6749.parameters.validate_token_parameters = patched_validate
+    try:
+        flow.fetch_token(code=auth_code)
+        credentials = flow.credentials
+        credentials_from_env = False  # Mark as file-based credentials
+        save_credentials(credentials)
+        logging.info("Authentication successful! Credentials saved.")
+        return credentials
+    except Exception as e:
+        logging.error(f"Authentication failed: {e}")
+        return None
+    finally:
+        oauthlib.oauth2.rfc6749.parameters.validate_token_parameters = original_validate
+def onboard_user(creds, project_id):
+    """Ensures the user is onboarded, matching gemini-cli setupUser behavior."""
+    global onboarding_complete
+    if onboarding_complete:
+        return
+    if creds.expired and creds.refresh_token:
+        try:
+            creds.refresh(GoogleAuthRequest())
+            save_credentials(creds)
+        except Exception as e:
+            raise Exception(f"Failed to refresh credentials during onboarding: {str(e)}")
+    headers = {
+        "Authorization": f"Bearer {creds.token}",
+        "Content-Type": "application/json",
+        "User-Agent": get_user_agent(),
+    }
+    load_assist_payload = {
+        "cloudaicompanionProject": project_id,
+        "metadata": get_client_metadata(project_id),
+    }
+    try:
+        import requests
+        resp = requests.post(
+            f"{CODE_ASSIST_ENDPOINT}/v1internal:loadCodeAssist",
+            data=json.dumps(load_assist_payload),
+            headers=headers,
+        )
+        resp.raise_for_status()
+        load_data = resp.json()
+        tier = None
+        if load_data.get("currentTier"):
+            tier = load_data["currentTier"]
+        else:
+            for allowed_tier in load_data.get("allowedTiers", []):
+                if allowed_tier.get("isDefault"):
+                    tier = allowed_tier
+                    break
+            if not tier:
+                tier = {
+                    "name": "",
+                    "description": "",
+                    "id": "legacy-tier",
+                    "userDefinedCloudaicompanionProject": True,
+                }
+        if tier.get("userDefinedCloudaicompanionProject") and not project_id:
+            raise ValueError("This account requires setting the GOOGLE_CLOUD_PROJECT env var.")
+        if load_data.get("currentTier"):
+            onboarding_complete = True
+            return
+        onboard_req_payload = {
+            "tierId": tier.get("id"),
+            "cloudaicompanionProject": project_id,
+            "metadata": get_client_metadata(project_id),
+        }
+        while True:
+            onboard_resp = requests.post(
+                f"{CODE_ASSIST_ENDPOINT}/v1internal:onboardUser",
+                data=json.dumps(onboard_req_payload),
+                headers=headers,
+            )
+            onboard_resp.raise_for_status()
+            lro_data = onboard_resp.json()
+            if lro_data.get("done"):
+                onboarding_complete = True
+                break
+            time.sleep(5)
+    except requests.exceptions.HTTPError as e:
+        raise Exception(f"User onboarding failed. Please check your Google Cloud project permissions and try again. Error: {e.response.text if hasattr(e, 'response') else str(e)}")
+    except Exception as e:
+        raise Exception(f"User onboarding failed due to an unexpected error: {str(e)}")
+def get_user_project_id(creds):
+    """Gets the user's project ID matching gemini-cli setupUser logic."""
+    global user_project_id
+    # Priority 1: Check environment variable first (always check, even if user_project_id is set)
+    env_project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
+    if env_project_id:
+        logging.info(f"Using project ID from GOOGLE_CLOUD_PROJECT environment variable: {env_project_id}")
+        user_project_id = env_project_id
+        save_credentials(creds, user_project_id)
+        return user_project_id
+    # If we already have a cached project_id and no env var override, use it
+    if user_project_id:
+        logging.info(f"Using cached project ID: {user_project_id}")
+        return user_project_id
+    # Priority 2: Check cached project ID in credential file
+    if os.path.exists(CREDENTIAL_FILE):
+        try:
+            with open(CREDENTIAL_FILE, "r") as f:
+                creds_data = json.load(f)
+                cached_project_id = creds_data.get("project_id")
+                if cached_project_id:
+                    logging.info(f"Using cached project ID from credential file: {cached_project_id}")
+                    user_project_id = cached_project_id
+                    return user_project_id
+        except Exception as e:
+            logging.warning(f"Could not read project_id from credential file: {e}")
+    # Priority 3: Make API call to discover project ID
+    # Ensure we have valid credentials for the API call
+    if creds.expired and creds.refresh_token:
+        try:
+            logging.info("Refreshing credentials before project ID discovery...")
+            creds.refresh(GoogleAuthRequest())
+            save_credentials(creds)
+            logging.info("Credentials refreshed successfully for project ID discovery")
+        except Exception as e:
+            logging.error(f"Failed to refresh credentials while getting project ID: {e}")
+            # Continue with existing credentials - they might still work
+    if not creds.token:
+        raise Exception("No valid access token available for project ID discovery")
+    headers = {
+        "Authorization": f"Bearer {creds.token}",
+        "Content-Type": "application/json",
+        "User-Agent": get_user_agent(),
+    }
+    probe_payload = {
+        "metadata": get_client_metadata(),
+    }
+    try:
+        import requests
+        logging.info("Attempting to discover project ID via API call...")
+        resp = requests.post(
+            f"{CODE_ASSIST_ENDPOINT}/v1internal:loadCodeAssist",
+            data=json.dumps(probe_payload),
+            headers=headers,
+        )
+        resp.raise_for_status()
+        data = resp.json()
+        discovered_project_id = data.get("cloudaicompanionProject")
+        if not discovered_project_id:
+            raise ValueError("Could not find 'cloudaicompanionProject' in loadCodeAssist response.")
+        logging.info(f"Discovered project ID via API: {discovered_project_id}")
+        user_project_id = discovered_project_id
+        save_credentials(creds, user_project_id)
+        return user_project_id
+    except requests.exceptions.HTTPError as e:
+        logging.error(f"HTTP error during project ID discovery: {e}")
+        if hasattr(e, 'response') and e.response:
+            logging.error(f"Response status: {e.response.status_code}, body: {e.response.text}")
+        raise Exception(f"Failed to discover project ID via API: {e}")
+    except Exception as e:
+        logging.error(f"Unexpected error during project ID discovery: {e}")
+        raise Exception(f"Failed to discover project ID: {e}")

src/config.py ADDED Viewed

	@@ -0,0 +1,247 @@

+"""
+Configuration constants for the Geminicli2api proxy server.
+Centralizes all configuration to avoid duplication across modules.
+"""
+import os
+# API Endpoints
+CODE_ASSIST_ENDPOINT = "https://cloudcode-pa.googleapis.com"
+# Client Configuration
+CLI_VERSION = "0.1.5"  # Match current gemini-cli version
+# OAuth Configuration
+CLIENT_ID = "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com"
+CLIENT_SECRET = "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"
+SCOPES = [
+    "https://www.googleapis.com/auth/cloud-platform",
+    "https://www.googleapis.com/auth/userinfo.email",
+    "https://www.googleapis.com/auth/userinfo.profile",
+]
+# File Paths
+SCRIPT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+CREDENTIAL_FILE = os.path.join(SCRIPT_DIR, os.getenv("GOOGLE_APPLICATION_CREDENTIALS", "oauth_creds.json"))
+# Authentication
+GEMINI_AUTH_PASSWORD = os.getenv("GEMINI_AUTH_PASSWORD", "123456")
+# Default Safety Settings for Google API
+DEFAULT_SAFETY_SETTINGS = [
+    {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_CIVIC_INTEGRITY", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_IMAGE_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_IMAGE_HARASSMENT", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_IMAGE_HATE", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_IMAGE_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
+    {"category": "HARM_CATEGORY_UNSPECIFIED", "threshold": "BLOCK_NONE"}
+]
+# Base Models (without search variants)
+BASE_MODELS = [
+    {
+        "name": "models/gemini-2.5-pro-preview-05-06",
+        "version": "001",
+        "displayName": "Gemini 2.5 Pro Preview 05-06",
+        "description": "Preview version of Gemini 2.5 Pro from May 6th",
+        "inputTokenLimit": 1048576,
+        "outputTokenLimit": 65535,
+        "supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
+        "temperature": 1.0,
+        "maxTemperature": 2.0,
+        "topP": 0.95,
+        "topK": 64
+    },
+    {
+        "name": "models/gemini-2.5-pro-preview-06-05",
+        "version": "001",
+        "displayName": "Gemini 2.5 Pro Preview 06-05",
+        "description": "Preview version of Gemini 2.5 Pro from June 5th",
+        "inputTokenLimit": 1048576,
+        "outputTokenLimit": 65535,
+        "supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
+        "temperature": 1.0,
+        "maxTemperature": 2.0,
+        "topP": 0.95,
+        "topK": 64
+    },
+    {
+        "name": "models/gemini-2.5-pro",
+        "version": "001",
+        "displayName": "Gemini 2.5 Pro",
+        "description": "Advanced multimodal model with enhanced capabilities",
+        "inputTokenLimit": 1048576,
+        "outputTokenLimit": 65535,
+        "supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
+        "temperature": 1.0,
+        "maxTemperature": 2.0,
+        "topP": 0.95,
+        "topK": 64
+    },
+    {
+        "name": "models/gemini-2.5-flash-preview-05-20",
+        "version": "001",
+        "displayName": "Gemini 2.5 Flash Preview 05-20",
+        "description": "Preview version of Gemini 2.5 Flash from May 20th",
+        "inputTokenLimit": 1048576,
+        "outputTokenLimit": 65535,
+        "supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
+        "temperature": 1.0,
+        "maxTemperature": 2.0,
+        "topP": 0.95,
+        "topK": 64
+    },
+    {
+        "name": "models/gemini-2.5-flash-preview-04-17",
+        "version": "001",
+        "displayName": "Gemini 2.5 Flash Preview 04-17",
+        "description": "Preview version of Gemini 2.5 Flash from April 17th",
+        "inputTokenLimit": 1048576,
+        "outputTokenLimit": 65535,
+        "supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
+        "temperature": 1.0,
+        "maxTemperature": 2.0,
+        "topP": 0.95,
+        "topK": 64
+    },
+    {
+        "name": "models/gemini-2.5-flash",
+        "version": "001",
+        "displayName": "Gemini 2.5 Flash",
+        "description": "Fast and efficient multimodal model with latest improvements",
+        "inputTokenLimit": 1048576,
+        "outputTokenLimit": 65535,
+        "supportedGenerationMethods": ["generateContent", "streamGenerateContent"],
+        "temperature": 1.0,
+        "maxTemperature": 2.0,
+        "topP": 0.95,
+        "topK": 64
+    }
+]
+# Generate search variants for applicable models
+def _generate_search_variants():
+    """Generate search variants for models that support content generation."""
+    search_models = []
+    for model in BASE_MODELS:
+        # Only add search variants for models that support content generation
+        if "generateContent" in model["supportedGenerationMethods"]:
+            search_variant = model.copy()
+            search_variant["name"] = model["name"] + "-search"
+            search_variant["displayName"] = model["displayName"] + " with Google Search"
+            search_variant["description"] = model["description"] + " (includes Google Search grounding)"
+            search_models.append(search_variant)
+    return search_models
+# Generate thinking variants for applicable models
+def _generate_thinking_variants():
+    """Generate nothinking and maxthinking variants for models that support thinking."""
+    thinking_models = []
+    for model in BASE_MODELS:
+        # Only add thinking variants for models that support content generation
+        # and contain "gemini-2.5-flash" or "gemini-2.5-pro" in their name
+        if ("generateContent" in model["supportedGenerationMethods"] and
+            ("gemini-2.5-flash" in model["name"] or "gemini-2.5-pro" in model["name"])):
+            # Add -nothinking variant
+            nothinking_variant = model.copy()
+            nothinking_variant["name"] = model["name"] + "-nothinking"
+            nothinking_variant["displayName"] = model["displayName"] + " (No Thinking)"
+            nothinking_variant["description"] = model["description"] + " (thinking disabled)"
+            thinking_models.append(nothinking_variant)
+            # Add -maxthinking variant
+            maxthinking_variant = model.copy()
+            maxthinking_variant["name"] = model["name"] + "-maxthinking"
+            maxthinking_variant["displayName"] = model["displayName"] + " (Max Thinking)"
+            maxthinking_variant["description"] = model["description"] + " (maximum thinking budget)"
+            thinking_models.append(maxthinking_variant)
+    return thinking_models
+# Generate combined variants (search + thinking combinations)
+def _generate_combined_variants():
+    """Generate combined search and thinking variants."""
+    combined_models = []
+    for model in BASE_MODELS:
+        # Only add combined variants for models that support content generation
+        # and contain "gemini-2.5-flash" or "gemini-2.5-pro" in their name
+        if ("generateContent" in model["supportedGenerationMethods"] and
+            ("gemini-2.5-flash" in model["name"] or "gemini-2.5-pro" in model["name"])):
+            # search + nothinking
+            search_nothinking = model.copy()
+            search_nothinking["name"] = model["name"] + "-search-nothinking"
+            search_nothinking["displayName"] = model["displayName"] + " with Google Search (No Thinking)"
+            search_nothinking["description"] = model["description"] + " (includes Google Search grounding, thinking disabled)"
+            combined_models.append(search_nothinking)
+            # search + maxthinking
+            search_maxthinking = model.copy()
+            search_maxthinking["name"] = model["name"] + "-search-maxthinking"
+            search_maxthinking["displayName"] = model["displayName"] + " with Google Search (Max Thinking)"
+            search_maxthinking["description"] = model["description"] + " (includes Google Search grounding, maximum thinking budget)"
+            combined_models.append(search_maxthinking)
+    return combined_models
+# Supported Models (includes base models, search variants, and thinking variants)
+# Combine all models and then sort them by name to group variants together
+all_models = BASE_MODELS + _generate_search_variants() + _generate_thinking_variants()
+SUPPORTED_MODELS = sorted(all_models, key=lambda x: x['name'])
+# Helper function to get base model name from any variant
+def get_base_model_name(model_name):
+    """Convert variant model name to base model name."""
+    # Remove all possible suffixes in order
+    suffixes = ["-maxthinking", "-nothinking", "-search"]
+    for suffix in suffixes:
+        if model_name.endswith(suffix):
+            return model_name[:-len(suffix)]
+    return model_name
+# Helper function to check if model uses search grounding
+def is_search_model(model_name):
+    """Check if model name indicates search grounding should be enabled."""
+    return "-search" in model_name
+# Helper function to check if model uses no thinking
+def is_nothinking_model(model_name):
+    """Check if model name indicates thinking should be disabled."""
+    return "-nothinking" in model_name
+# Helper function to check if model uses max thinking
+def is_maxthinking_model(model_name):
+    """Check if model name indicates maximum thinking budget should be used."""
+    return "-maxthinking" in model_name
+# Helper function to get thinking budget for a model
+def get_thinking_budget(model_name):
+    """Get the appropriate thinking budget for a model based on its name and variant."""
+    base_model = get_base_model_name(model_name)
+    if is_nothinking_model(model_name):
+        if "gemini-2.5-flash" in base_model:
+            return 0  # No thinking for flash
+        elif "gemini-2.5-pro" in base_model:
+            return 128  # Limited thinking for pro
+    elif is_maxthinking_model(model_name):
+        if "gemini-2.5-flash" in base_model:
+            return 24576
+        elif "gemini-2.5-pro" in base_model:
+            return 32768
+    else:
+        # Default thinking budget for regular models
+        return -1  # Default for all models
+# Helper function to check if thinking should be included in output
+def should_include_thoughts(model_name):
+    """Check if thoughts should be included in the response."""
+    if is_nothinking_model(model_name):
+        # For nothinking mode, still include thoughts if it's a pro model
+        base_model = get_base_model_name(model_name)
+        return "gemini-2.5-pro" in base_model
+    else:
+        # For all other modes, include thoughts
+        return True

src/gemini_routes.py ADDED Viewed

	@@ -0,0 +1,186 @@

+"""
+Gemini API Routes - Handles native Gemini API endpoints.
+This module provides native Gemini API endpoints that proxy directly to Google's API
+without any format transformations.
+"""
+import json
+import logging
+from fastapi import APIRouter, Request, Response, Depends
+from .auth import authenticate_user
+from .google_api_client import send_gemini_request, build_gemini_payload_from_native
+from .config import SUPPORTED_MODELS
+router = APIRouter()
+@router.get("/v1beta/models")
+async def gemini_list_models(request: Request, username: str = Depends(authenticate_user)):
+    """
+    Native Gemini models endpoint.
+    Returns available models in Gemini format, matching the official Gemini API.
+    """
+    try:
+        logging.info("Gemini models list requested")
+        models_response = {
+            "models": SUPPORTED_MODELS
+        }
+        logging.info(f"Returning {len(SUPPORTED_MODELS)} Gemini models")
+        return Response(
+            content=json.dumps(models_response),
+            status_code=200,
+            media_type="application/json; charset=utf-8"
+        )
+    except Exception as e:
+        logging.error(f"Failed to list Gemini models: {str(e)}")
+        return Response(
+            content=json.dumps({
+                "error": {
+                    "message": f"Failed to list models: {str(e)}",
+                    "code": 500
+                }
+            }),
+            status_code=500,
+            media_type="application/json"
+        )
+@router.api_route("/{full_path:path}", methods=["GET", "POST", "PUT", "DELETE", "PATCH"])
+async def gemini_proxy(request: Request, full_path: str, username: str = Depends(authenticate_user)):
+    """
+    Native Gemini API proxy endpoint.
+    Handles all native Gemini API calls by proxying them directly to Google's API.
+    This endpoint handles paths like:
+    - /v1beta/models/{model}/generateContent
+    - /v1beta/models/{model}/streamGenerateContent
+    - /v1/models/{model}/generateContent
+    - etc.
+    """
+    try:
+        # Get the request body
+        post_data = await request.body()
+        # Determine if this is a streaming request
+        is_streaming = "stream" in full_path.lower()
+        # Extract model name from the path
+        # Paths typically look like: v1beta/models/gemini-1.5-pro/generateContent
+        model_name = _extract_model_from_path(full_path)
+        logging.info(f"Gemini proxy request: path={full_path}, model={model_name}, stream={is_streaming}")
+        if not model_name:
+            logging.error(f"Could not extract model name from path: {full_path}")
+            return Response(
+                content=json.dumps({
+                    "error": {
+                        "message": f"Could not extract model name from path: {full_path}",
+                        "code": 400
+                    }
+                }),
+                status_code=400,
+                media_type="application/json"
+            )
+        # Parse the incoming request
+        try:
+            if post_data:
+                incoming_request = json.loads(post_data)
+            else:
+                incoming_request = {}
+        except json.JSONDecodeError as e:
+            logging.error(f"Invalid JSON in request body: {str(e)}")
+            return Response(
+                content=json.dumps({
+                    "error": {
+                        "message": "Invalid JSON in request body",
+                        "code": 400
+                    }
+                }),
+                status_code=400,
+                media_type="application/json"
+            )
+        # Build the payload for Google API
+        gemini_payload = build_gemini_payload_from_native(incoming_request, model_name)
+        # Send the request to Google API
+        response = send_gemini_request(gemini_payload, is_streaming=is_streaming)
+        # Log the response status
+        if hasattr(response, 'status_code'):
+            if response.status_code != 200:
+                logging.error(f"Gemini API returned error: status={response.status_code}")
+            else:
+                logging.info(f"Successfully processed Gemini request for model: {model_name}")
+        return response
+    except Exception as e:
+        logging.error(f"Gemini proxy error: {str(e)}")
+        return Response(
+            content=json.dumps({
+                "error": {
+                    "message": f"Proxy error: {str(e)}",
+                    "code": 500
+                }
+            }),
+            status_code=500,
+            media_type="application/json"
+        )
+def _extract_model_from_path(path: str) -> str:
+    """
+    Extract the model name from a Gemini API path.
+    Examples:
+    - "v1beta/models/gemini-1.5-pro/generateContent" -> "gemini-1.5-pro"
+    - "v1/models/gemini-2.0-flash/streamGenerateContent" -> "gemini-2.0-flash"
+    Args:
+        path: The API path
+    Returns:
+        Model name (just the model name, not prefixed with "models/") or None if not found
+    """
+    parts = path.split('/')
+    # Look for the pattern: .../models/{model_name}/...
+    try:
+        models_index = parts.index('models')
+        if models_index + 1 < len(parts):
+            model_name = parts[models_index + 1]
+            # Remove any action suffix like ":streamGenerateContent" or ":generateContent"
+            if ':' in model_name:
+                model_name = model_name.split(':')[0]
+            # Return just the model name without "models/" prefix
+            return model_name
+    except ValueError:
+        pass
+    # If we can't find the pattern, return None
+    return None
+@router.get("/v1/models")
+async def gemini_list_models_v1(request: Request, username: str = Depends(authenticate_user)):
+    """
+    Alternative models endpoint for v1 API version.
+    Some clients might use /v1/models instead of /v1beta/models.
+    """
+    return await gemini_list_models(request, username)
+# Health check endpoint
+@router.get("/health")
+async def health_check():
+    """
+    Simple health check endpoint.
+    """
+    return {"status": "healthy", "service": "geminicli2api"}

src/google_api_client.py ADDED Viewed

	@@ -0,0 +1,340 @@

+"""
+Google API Client - Handles all communication with Google's Gemini API.
+This module is used by both OpenAI compatibility layer and native Gemini endpoints.
+"""
+import json
+import logging
+import requests
+from fastapi import Response
+from fastapi.responses import StreamingResponse
+from google.auth.transport.requests import Request as GoogleAuthRequest
+from .auth import get_credentials, save_credentials, get_user_project_id, onboard_user
+from .utils import get_user_agent
+from .config import (
+    CODE_ASSIST_ENDPOINT,
+    DEFAULT_SAFETY_SETTINGS,
+    get_base_model_name,
+    is_search_model,
+    get_thinking_budget,
+    should_include_thoughts
+)
+import asyncio
+def send_gemini_request(payload: dict, is_streaming: bool = False) -> Response:
+    """
+    Send a request to Google's Gemini API.
+    Args:
+        payload: The request payload in Gemini format
+        is_streaming: Whether this is a streaming request
+    Returns:
+        FastAPI Response object
+    """
+    # Get and validate credentials
+    creds = get_credentials()
+    if not creds:
+        return Response(
+            content="Authentication failed. Please restart the proxy to log in.",
+            status_code=500
+        )
+    # Refresh credentials if needed
+    if creds.expired and creds.refresh_token:
+        try:
+            creds.refresh(GoogleAuthRequest())
+            save_credentials(creds)
+        except Exception as e:
+            return Response(
+                content="Token refresh failed. Please restart the proxy to re-authenticate.",
+                status_code=500
+            )
+    elif not creds.token:
+        return Response(
+            content="No access token. Please restart the proxy to re-authenticate.",
+            status_code=500
+        )
+    # Get project ID and onboard user
+    proj_id = get_user_project_id(creds)
+    if not proj_id:
+        return Response(content="Failed to get user project ID.", status_code=500)
+    onboard_user(creds, proj_id)
+    # Build the final payload with project info
+    final_payload = {
+        "model": payload.get("model"),
+        "project": proj_id,
+        "request": payload.get("request", {})
+    }
+    # Determine the action and URL
+    action = "streamGenerateContent" if is_streaming else "generateContent"
+    target_url = f"{CODE_ASSIST_ENDPOINT}/v1internal:{action}"
+    if is_streaming:
+        target_url += "?alt=sse"
+    # Build request headers
+    request_headers = {
+        "Authorization": f"Bearer {creds.token}",
+        "Content-Type": "application/json",
+        "User-Agent": get_user_agent(),
+    }
+    final_post_data = json.dumps(final_payload)
+    # Send the request
+    try:
+        if is_streaming:
+            resp = requests.post(target_url, data=final_post_data, headers=request_headers, stream=True)
+            return _handle_streaming_response(resp)
+        else:
+            resp = requests.post(target_url, data=final_post_data, headers=request_headers)
+            return _handle_non_streaming_response(resp)
+    except requests.exceptions.RequestException as e:
+        logging.error(f"Request to Google API failed: {str(e)}")
+        return Response(
+            content=json.dumps({"error": {"message": f"Request failed: {str(e)}"}}),
+            status_code=500,
+            media_type="application/json"
+        )
+    except Exception as e:
+        logging.error(f"Unexpected error during Google API request: {str(e)}")
+        return Response(
+            content=json.dumps({"error": {"message": f"Unexpected error: {str(e)}"}}),
+            status_code=500,
+            media_type="application/json"
+        )
+def _handle_streaming_response(resp) -> StreamingResponse:
+    """Handle streaming response from Google API."""
+    # Check for HTTP errors before starting to stream
+    if resp.status_code != 200:
+        logging.error(f"Google API returned status {resp.status_code}: {resp.text}")
+        error_message = f"Google API error: {resp.status_code}"
+        try:
+            error_data = resp.json()
+            if "error" in error_data:
+                error_message = error_data["error"].get("message", error_message)
+        except:
+            pass
+        # Return error as a streaming response
+        async def error_generator():
+            error_response = {
+                "error": {
+                    "message": error_message,
+                    "type": "invalid_request_error" if resp.status_code == 404 else "api_error",
+                    "code": resp.status_code
+                }
+            }
+            yield f'data: {json.dumps(error_response)}\n\n'.encode('utf-8')
+        response_headers = {
+            "Content-Type": "text/event-stream",
+            "Content-Disposition": "attachment",
+            "Vary": "Origin, X-Origin, Referer",
+            "X-XSS-Protection": "0",
+            "X-Frame-Options": "SAMEORIGIN",
+            "X-Content-Type-Options": "nosniff",
+            "Server": "ESF"
+        }
+        return StreamingResponse(
+            error_generator(),
+            media_type="text/event-stream",
+            headers=response_headers,
+            status_code=resp.status_code
+        )
+    async def stream_generator():
+        try:
+            with resp:
+                for chunk in resp.iter_lines():
+                    if chunk:
+                        if not isinstance(chunk, str):
+                            chunk = chunk.decode('utf-8', "ignore")
+                        if chunk.startswith('data: '):
+                            chunk = chunk[len('data: '):]
+                            try:
+                                obj = json.loads(chunk)
+                                if "response" in obj:
+                                    response_chunk = obj["response"]
+                                    response_json = json.dumps(response_chunk, separators=(',', ':'))
+                                    response_line = f"data: {response_json}\n\n"
+                                    yield response_line.encode('utf-8', "ignore")
+                                    await asyncio.sleep(0)
+                                else:
+                                    obj_json = json.dumps(obj, separators=(',', ':'))
+                                    yield f"data: {obj_json}\n\n".encode('utf-8', "ignore")
+                            except json.JSONDecodeError:
+                                continue
+        except requests.exceptions.RequestException as e:
+            logging.error(f"Streaming request failed: {str(e)}")
+            error_response = {
+                "error": {
+                    "message": f"Upstream request failed: {str(e)}",
+                    "type": "api_error",
+                    "code": 502
+                }
+            }
+            yield f'data: {json.dumps(error_response)}\n\n'.encode('utf-8', "ignore")
+        except Exception as e:
+            logging.error(f"Unexpected error during streaming: {str(e)}")
+            error_response = {
+                "error": {
+                    "message": f"An unexpected error occurred: {str(e)}",
+                    "type": "api_error",
+                    "code": 500
+                }
+            }
+            yield f'data: {json.dumps(error_response)}\n\n'.encode('utf-8', "ignore")
+    response_headers = {
+        "Content-Type": "text/event-stream",
+        "Content-Disposition": "attachment",
+        "Vary": "Origin, X-Origin, Referer",
+        "X-XSS-Protection": "0",
+        "X-Frame-Options": "SAMEORIGIN",
+        "X-Content-Type-Options": "nosniff",
+        "Server": "ESF"
+    }
+    return StreamingResponse(
+        stream_generator(),
+        media_type="text/event-stream",
+        headers=response_headers
+    )
+def _handle_non_streaming_response(resp) -> Response:
+    """Handle non-streaming response from Google API."""
+    if resp.status_code == 200:
+        try:
+            google_api_response = resp.text
+            if google_api_response.startswith('data: '):
+                google_api_response = google_api_response[len('data: '):]
+            google_api_response = json.loads(google_api_response)
+            standard_gemini_response = google_api_response.get("response")
+            return Response(
+                content=json.dumps(standard_gemini_response),
+                status_code=200,
+                media_type="application/json; charset=utf-8"
+            )
+        except (json.JSONDecodeError, AttributeError) as e:
+            logging.error(f"Failed to parse Google API response: {str(e)}")
+            return Response(
+                content=resp.content,
+                status_code=resp.status_code,
+                media_type=resp.headers.get("Content-Type")
+            )
+    else:
+        # Log the error details
+        logging.error(f"Google API returned status {resp.status_code}: {resp.text}")
+        # Try to parse error response and provide meaningful error message
+        try:
+            error_data = resp.json()
+            if "error" in error_data:
+                error_message = error_data["error"].get("message", f"API error: {resp.status_code}")
+                error_response = {
+                    "error": {
+                        "message": error_message,
+                        "type": "invalid_request_error" if resp.status_code == 404 else "api_error",
+                        "code": resp.status_code
+                    }
+                }
+                return Response(
+                    content=json.dumps(error_response),
+                    status_code=resp.status_code,
+                    media_type="application/json"
+                )
+        except (json.JSONDecodeError, KeyError):
+            pass
+        # Fallback to original response if we can't parse the error
+        return Response(
+            content=resp.content,
+            status_code=resp.status_code,
+            media_type=resp.headers.get("Content-Type")
+        )
+def build_gemini_payload_from_openai(openai_payload: dict) -> dict:
+    """
+    Build a Gemini API payload from an OpenAI-transformed request.
+    This is used when OpenAI requests are converted to Gemini format.
+    """
+    # Extract model from the payload
+    model = openai_payload.get("model")
+    # Get safety settings or use defaults
+    safety_settings = openai_payload.get("safetySettings", DEFAULT_SAFETY_SETTINGS)
+    # Build the request portion
+    request_data = {
+        "contents": openai_payload.get("contents"),
+        "systemInstruction": openai_payload.get("systemInstruction"),
+        "cachedContent": openai_payload.get("cachedContent"),
+        "tools": openai_payload.get("tools"),
+        "toolConfig": openai_payload.get("toolConfig"),
+        "safetySettings": safety_settings,
+        "generationConfig": openai_payload.get("generationConfig", {}),
+    }
+    # Remove any keys with None values
+    request_data = {k: v for k, v in request_data.items() if v is not None}
+    return {
+        "model": model,
+        "request": request_data
+    }
+def build_gemini_payload_from_native(native_request: dict, model_from_path: str) -> dict:
+    """
+    Build a Gemini API payload from a native Gemini request.
+    This is used for direct Gemini API calls.
+    """
+    native_request["safetySettings"] = DEFAULT_SAFETY_SETTINGS
+    if "generationConfig" not in native_request:
+        native_request["generationConfig"] = {}
+    # native_request["enableEnhancedCivicAnswers"] = False
+    if "thinkingConfig" not in native_request["generationConfig"]:
+        native_request["generationConfig"]["thinkingConfig"] = {}
+    # Configure thinking based on model variant
+    thinking_budget = get_thinking_budget(model_from_path)
+    include_thoughts = should_include_thoughts(model_from_path)
+    native_request["generationConfig"]["thinkingConfig"]["includeThoughts"] = include_thoughts
+    if "thinkingBudget" in native_request["generationConfig"]["thinkingConfig"] and thinking_budget == -1:
+        pass
+    else:
+        native_request["generationConfig"]["thinkingConfig"]["thinkingBudget"] = thinking_budget
+    # Add Google Search grounding for search models
+    if is_search_model(model_from_path):
+        if "tools" not in native_request:
+            native_request["tools"] = []
+        # Add googleSearch tool if not already present
+        if not any(tool.get("googleSearch") for tool in native_request["tools"]):
+            native_request["tools"].append({"googleSearch": {}})
+    return {
+        "model": get_base_model_name(model_from_path),  # Use base model name for API call
+        "request": native_request
+    }

src/main.py ADDED Viewed

	@@ -0,0 +1,143 @@

+import logging
+import os
+from fastapi import FastAPI, Request, Response
+from fastapi.middleware.cors import CORSMiddleware
+from .gemini_routes import router as gemini_router
+from .openai_routes import router as openai_router
+from .auth import get_credentials, get_user_project_id, onboard_user
+# Load environment variables from .env file
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+    logging.info("Environment variables loaded from .env file")
+except ImportError:
+    logging.warning("python-dotenv not installed, .env file will not be loaded automatically")
+except Exception as e:
+    logging.warning(f"Could not load .env file: {e}")
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+app = FastAPI()
+# Add CORS middleware for preflight requests
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allow all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allow all methods
+    allow_headers=["*"],  # Allow all headers
+)
+@app.on_event("startup")
+async def startup_event():
+    try:
+        logging.info("Starting Gemini proxy server...")
+        # Check if credentials exist
+        import os
+        from .config import CREDENTIAL_FILE
+        env_creds_json = os.getenv("GEMINI_CREDENTIALS")
+        creds_file_exists = os.path.exists(CREDENTIAL_FILE)
+        if env_creds_json or creds_file_exists:
+            try:
+                # Try to load existing credentials without OAuth flow first
+                creds = get_credentials(allow_oauth_flow=False)
+                if creds:
+                    try:
+                        proj_id = get_user_project_id(creds)
+                        if proj_id:
+                            onboard_user(creds, proj_id)
+                            logging.info(f"Successfully onboarded with project ID: {proj_id}")
+                        logging.info("Gemini proxy server started successfully")
+                        logging.info("Authentication required - Password: see .env file")
+                    except Exception as e:
+                        logging.error(f"Setup failed: {str(e)}")
+                        logging.warning("Server started but may not function properly until setup issues are resolved.")
+                else:
+                    logging.warning("Credentials file exists but could not be loaded. Server started - authentication will be required on first request.")
+            except Exception as e:
+                logging.error(f"Credential loading error: {str(e)}")
+                logging.warning("Server started but credentials need to be set up.")
+        else:
+            # No credentials found - prompt user to authenticate
+            logging.info("No credentials found. Starting OAuth authentication flow...")
+            try:
+                creds = get_credentials(allow_oauth_flow=True)
+                if creds:
+                    try:
+                        proj_id = get_user_project_id(creds)
+                        if proj_id:
+                            onboard_user(creds, proj_id)
+                            logging.info(f"Successfully onboarded with project ID: {proj_id}")
+                        logging.info("Gemini proxy server started successfully")
+                    except Exception as e:
+                        logging.error(f"Setup failed: {str(e)}")
+                        logging.warning("Server started but may not function properly until setup issues are resolved.")
+                else:
+                    logging.error("Authentication failed. Server started but will not function until credentials are provided.")
+            except Exception as e:
+                logging.error(f"Authentication error: {str(e)}")
+                logging.warning("Server started but authentication failed.")
+        logging.info("Authentication required - Password: see .env file")
+    except Exception as e:
+        logging.error(f"Startup error: {str(e)}")
+        logging.warning("Server may not function properly.")
+@app.options("/{full_path:path}")
+async def handle_preflight(request: Request, full_path: str):
+    """Handle CORS preflight requests without authentication."""
+    return Response(
+        status_code=200,
+        headers={
+            "Access-Control-Allow-Origin": "*",
+            "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, PATCH, OPTIONS",
+            "Access-Control-Allow-Headers": "*",
+            "Access-Control-Allow-Credentials": "true",
+        }
+    )
+# Root endpoint - no authentication required
+@app.get("/")
+async def root():
+    """
+    Root endpoint providing project information.
+    No authentication required.
+    """
+    return {
+        "name": "geminicli2api",
+        "description": "OpenAI-compatible API proxy for Google's Gemini models via gemini-cli",
+        "purpose": "Provides both OpenAI-compatible endpoints (/v1/chat/completions) and native Gemini API endpoints for accessing Google's Gemini models",
+        "version": "1.0.0",
+        "endpoints": {
+            "openai_compatible": {
+                "chat_completions": "/v1/chat/completions",
+                "models": "/v1/models"
+            },
+            "native_gemini": {
+                "models": "/v1beta/models",
+                "generate": "/v1beta/models/{model}/generateContent",
+                "stream": "/v1beta/models/{model}/streamGenerateContent"
+            },
+            "health": "/health"
+        },
+        "authentication": "Required for all endpoints except root and health",
+        "repository": "https://github.com/user/geminicli2api"
+    }
+# Health check endpoint for Docker/Hugging Face
+@app.get("/health")
+async def health_check():
+    """Health check endpoint for container orchestration."""
+    return {"status": "healthy", "service": "geminicli2api"}
+app.include_router(openai_router)
+app.include_router(gemini_router)

src/models.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from pydantic import BaseModel, Field
+from typing import List, Optional, Union, Dict, Any
+# OpenAI Models
+class OpenAIChatMessage(BaseModel):
+    role: str
+    content: Union[str, List[Dict[str, Any]]]
+    reasoning_content: Optional[str] = None
+class OpenAIChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[OpenAIChatMessage]
+    stream: bool = False
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    max_tokens: Optional[int] = None
+    stop: Optional[Union[str, List[str]]] = None
+    frequency_penalty: Optional[float] = None
+    presence_penalty: Optional[float] = None
+    n: Optional[int] = None
+    seed: Optional[int] = None
+    response_format: Optional[Dict[str, Any]] = None
+    class Config:
+        extra = "allow"  # Allow additional fields not explicitly defined
+class OpenAIChatCompletionChoice(BaseModel):
+    index: int
+    message: OpenAIChatMessage
+    finish_reason: Optional[str] = None
+class OpenAIChatCompletionResponse(BaseModel):
+    id: str
+    object: str
+    created: int
+    model: str
+    choices: List[OpenAIChatCompletionChoice]
+class OpenAIDelta(BaseModel):
+    content: Optional[str] = None
+    reasoning_content: Optional[str] = None
+class OpenAIChatCompletionStreamChoice(BaseModel):
+    index: int
+    delta: OpenAIDelta
+    finish_reason: Optional[str] = None
+class OpenAIChatCompletionStreamResponse(BaseModel):
+    id: str
+    object: str
+    created: int
+    model: str
+    choices: List[OpenAIChatCompletionStreamChoice]
+# Gemini Models
+class GeminiPart(BaseModel):
+    text: str
+class GeminiContent(BaseModel):
+    role: str
+    parts: List[GeminiPart]
+class GeminiRequest(BaseModel):
+    contents: List[GeminiContent]
+class GeminiCandidate(BaseModel):
+    content: GeminiContent
+    finish_reason: Optional[str] = None
+    index: int
+class GeminiResponse(BaseModel):
+    candidates: List[GeminiCandidate]

src/openai_routes.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""
+OpenAI API Routes - Handles OpenAI-compatible endpoints.
+This module provides OpenAI-compatible endpoints that transform requests/responses
+and delegate to the Google API client.
+"""
+import json
+import uuid
+import asyncio
+import logging
+from fastapi import APIRouter, Request, Response, Depends
+from fastapi.responses import StreamingResponse
+from .auth import authenticate_user
+from .models import OpenAIChatCompletionRequest
+from .openai_transformers import (
+    openai_request_to_gemini,
+    gemini_response_to_openai,
+    gemini_stream_chunk_to_openai
+)
+from .google_api_client import send_gemini_request, build_gemini_payload_from_openai
+router = APIRouter()
+@router.post("/v1/chat/completions")
+async def openai_chat_completions(
+    request: OpenAIChatCompletionRequest,
+    http_request: Request,
+    username: str = Depends(authenticate_user)
+):
+    """
+    OpenAI-compatible chat completions endpoint.
+    Transforms OpenAI requests to Gemini format, sends to Google API,
+    and transforms responses back to OpenAI format.
+    """
+    try:
+        logging.info(f"OpenAI chat completion request: model={request.model}, stream={request.stream}")
+        # Transform OpenAI request to Gemini format
+        gemini_request_data = openai_request_to_gemini(request)
+        # Build the payload for Google API
+        gemini_payload = build_gemini_payload_from_openai(gemini_request_data)
+    except Exception as e:
+        logging.error(f"Error processing OpenAI request: {str(e)}")
+        return Response(
+            content=json.dumps({
+                "error": {
+                    "message": f"Request processing failed: {str(e)}",
+                    "type": "invalid_request_error",
+                    "code": 400
+                }
+            }),
+            status_code=400,
+            media_type="application/json"
+        )
+    if request.stream:
+        # Handle streaming response
+        async def openai_stream_generator():
+            try:
+                response = send_gemini_request(gemini_payload, is_streaming=True)
+                if isinstance(response, StreamingResponse):
+                    response_id = "chatcmpl-" + str(uuid.uuid4())
+                    logging.info(f"Starting streaming response: {response_id}")
+                    async for chunk in response.body_iterator:
+                        if isinstance(chunk, bytes):
+                            chunk = chunk.decode('utf-8', "ignore")
+                        if chunk.startswith('data: '):
+                            try:
+                                # Parse the Gemini streaming chunk
+                                chunk_data = chunk[6:]  # Remove 'data: ' prefix
+                                gemini_chunk = json.loads(chunk_data)
+                                # Check if this is an error chunk
+                                if "error" in gemini_chunk:
+                                    logging.error(f"Error in streaming response: {gemini_chunk['error']}")
+                                    # Transform error to OpenAI format
+                                    error_data = {
+                                        "error": {
+                                            "message": gemini_chunk["error"].get("message", "Unknown error"),
+                                            "type": gemini_chunk["error"].get("type", "api_error"),
+                                            "code": gemini_chunk["error"].get("code")
+                                        }
+                                    }
+                                    yield f"data: {json.dumps(error_data)}\n\n"
+                                    yield "data: [DONE]\n\n"
+                                    return
+                                # Transform to OpenAI format
+                                openai_chunk = gemini_stream_chunk_to_openai(
+                                    gemini_chunk,
+                                    request.model,
+                                    response_id
+                                )
+                                # Send as OpenAI streaming format
+                                yield f"data: {json.dumps(openai_chunk)}\n\n"
+                                await asyncio.sleep(0)
+                            except (json.JSONDecodeError, KeyError, UnicodeDecodeError) as e:
+                                logging.warning(f"Failed to parse streaming chunk: {str(e)}")
+                                continue
+                    # Send the final [DONE] marker
+                    yield "data: [DONE]\n\n"
+                    logging.info(f"Completed streaming response: {response_id}")
+                else:
+                    # Error case - handle Response object with error
+                    error_msg = "Streaming request failed"
+                    status_code = 500
+                    if hasattr(response, 'status_code'):
+                        status_code = response.status_code
+                        error_msg += f" (status: {status_code})"
+                    if hasattr(response, 'body'):
+                        try:
+                            # Try to parse error response
+                            error_body = response.body
+                            if isinstance(error_body, bytes):
+                                error_body = error_body.decode('utf-8', "ignore")
+                            error_data = json.loads(error_body)
+                            if "error" in error_data:
+                                error_msg = error_data["error"].get("message", error_msg)
+                        except:
+                            pass
+                    logging.error(f"Streaming request failed: {error_msg}")
+                    error_data = {
+                        "error": {
+                            "message": error_msg,
+                            "type": "invalid_request_error" if status_code == 404 else "api_error",
+                            "code": status_code
+                        }
+                    }
+                    yield f"data: {json.dumps(error_data)}\n\n"
+                    yield "data: [DONE]\n\n"
+            except Exception as e:
+                logging.error(f"Streaming error: {str(e)}")
+                error_data = {
+                    "error": {
+                        "message": f"Streaming failed: {str(e)}",
+                        "type": "api_error",
+                        "code": 500
+                    }
+                }
+                yield f"data: {json.dumps(error_data)}\n\n"
+                yield "data: [DONE]\n\n"
+        return StreamingResponse(
+            openai_stream_generator(),
+            media_type="text/event-stream"
+        )
+    else:
+        # Handle non-streaming response
+        try:
+            response = send_gemini_request(gemini_payload, is_streaming=False)
+            if isinstance(response, Response) and response.status_code != 200:
+                # Handle error responses from Google API
+                logging.error(f"Gemini API error: status={response.status_code}")
+                try:
+                    # Try to parse the error response and transform to OpenAI format
+                    error_body = response.body
+                    if isinstance(error_body, bytes):
+                        error_body = error_body.decode('utf-8', "ignore")
+                    error_data = json.loads(error_body)
+                    if "error" in error_data:
+                        # Transform Google API error to OpenAI format
+                        openai_error = {
+                            "error": {
+                                "message": error_data["error"].get("message", f"API error: {response.status_code}"),
+                                "type": error_data["error"].get("type", "invalid_request_error" if response.status_code == 404 else "api_error"),
+                                "code": error_data["error"].get("code", response.status_code)
+                            }
+                        }
+                        return Response(
+                            content=json.dumps(openai_error),
+                            status_code=response.status_code,
+                            media_type="application/json"
+                        )
+                except (json.JSONDecodeError, UnicodeDecodeError):
+                    pass
+                # Fallback error response
+                return Response(
+                    content=json.dumps({
+                        "error": {
+                            "message": f"API error: {response.status_code}",
+                            "type": "invalid_request_error" if response.status_code == 404 else "api_error",
+                            "code": response.status_code
+                        }
+                    }),
+                    status_code=response.status_code,
+                    media_type="application/json"
+                )
+            try:
+                # Parse Gemini response and transform to OpenAI format
+                gemini_response = json.loads(response.body)
+                openai_response = gemini_response_to_openai(gemini_response, request.model)
+                logging.info(f"Successfully processed non-streaming response for model: {request.model}")
+                return openai_response
+            except (json.JSONDecodeError, AttributeError) as e:
+                logging.error(f"Failed to parse Gemini response: {str(e)}")
+                return Response(
+                    content=json.dumps({
+                        "error": {
+                            "message": f"Failed to process response: {str(e)}",
+                            "type": "api_error",
+                            "code": 500
+                        }
+                    }),
+                    status_code=500,
+                    media_type="application/json"
+                )
+        except Exception as e:
+            logging.error(f"Non-streaming request failed: {str(e)}")
+            return Response(
+                content=json.dumps({
+                    "error": {
+                        "message": f"Request failed: {str(e)}",
+                        "type": "api_error",
+                        "code": 500
+                    }
+                }),
+                status_code=500,
+                media_type="application/json"
+            )
+@router.get("/v1/models")
+async def openai_list_models(username: str = Depends(authenticate_user)):
+    """
+    OpenAI-compatible models endpoint.
+    Returns available models in OpenAI format.
+    """
+    try:
+        logging.info("OpenAI models list requested")
+        # Convert our Gemini models to OpenAI format
+        from .config import SUPPORTED_MODELS
+        openai_models = []
+        for model in SUPPORTED_MODELS:
+            # Remove "models/" prefix for OpenAI compatibility
+            model_id = model["name"].replace("models/", "")
+            openai_models.append({
+                "id": model_id,
+                "object": "model",
+                "created": 1677610602,  # Static timestamp
+                "owned_by": "google",
+                "permission": [
+                    {
+                        "id": "modelperm-" + model_id.replace("/", "-"),
+                        "object": "model_permission",
+                        "created": 1677610602,
+                        "allow_create_engine": False,
+                        "allow_sampling": True,
+                        "allow_logprobs": False,
+                        "allow_search_indices": False,
+                        "allow_view": True,
+                        "allow_fine_tuning": False,
+                        "organization": "*",
+                        "group": None,
+                        "is_blocking": False
+                    }
+                ],
+                "root": model_id,
+                "parent": None
+            })
+        logging.info(f"Returning {len(openai_models)} models")
+        return {
+            "object": "list",
+            "data": openai_models
+        }
+    except Exception as e:
+        logging.error(f"Failed to list models: {str(e)}")
+        return Response(
+            content=json.dumps({
+                "error": {
+                    "message": f"Failed to list models: {str(e)}",
+                    "type": "api_error",
+                    "code": 500
+                }
+            }),
+            status_code=500,
+            media_type="application/json"
+        )

src/openai_transformers.py ADDED Viewed

	@@ -0,0 +1,260 @@

+"""
+OpenAI Format Transformers - Handles conversion between OpenAI and Gemini API formats.
+This module contains all the logic for transforming requests and responses between the two formats.
+"""
+import json
+import time
+import uuid
+from typing import Dict, Any
+from .models import OpenAIChatCompletionRequest, OpenAIChatCompletionResponse
+from .config import (
+    DEFAULT_SAFETY_SETTINGS,
+    is_search_model,
+    get_base_model_name,
+    get_thinking_budget,
+    should_include_thoughts
+)
+def openai_request_to_gemini(openai_request: OpenAIChatCompletionRequest) -> Dict[str, Any]:
+    """
+    Transform an OpenAI chat completion request to Gemini format.
+    Args:
+        openai_request: OpenAI format request
+    Returns:
+        Dictionary in Gemini API format
+    """
+    contents = []
+    # Process each message in the conversation
+    for message in openai_request.messages:
+        role = message.role
+        # Map OpenAI roles to Gemini roles
+        if role == "assistant":
+            role = "model"
+        elif role == "system":
+            role = "user"  # Gemini treats system messages as user messages
+        # Handle different content types (string vs list of parts)
+        if isinstance(message.content, list):
+            parts = []
+            for part in message.content:
+                if part.get("type") == "text":
+                    parts.append({"text": part.get("text", "")})
+                elif part.get("type") == "image_url":
+                    image_url = part.get("image_url", {}).get("url")
+                    if image_url:
+                        # Parse data URI: "data:image/jpeg;base64,{base64_image}"
+                        try:
+                            mime_type, base64_data = image_url.split(";")
+                            _, mime_type = mime_type.split(":")
+                            _, base64_data = base64_data.split(",")
+                            parts.append({
+                                "inlineData": {
+                                    "mimeType": mime_type,
+                                    "data": base64_data
+                                }
+                            })
+                        except ValueError:
+                            continue
+            contents.append({"role": role, "parts": parts})
+        else:
+            # Simple text content
+            contents.append({"role": role, "parts": [{"text": message.content}]})
+    # Map OpenAI generation parameters to Gemini format
+    generation_config = {}
+    if openai_request.temperature is not None:
+        generation_config["temperature"] = openai_request.temperature
+    if openai_request.top_p is not None:
+        generation_config["topP"] = openai_request.top_p
+    if openai_request.max_tokens is not None:
+        generation_config["maxOutputTokens"] = openai_request.max_tokens
+    if openai_request.stop is not None:
+        # Gemini supports stop sequences
+        if isinstance(openai_request.stop, str):
+            generation_config["stopSequences"] = [openai_request.stop]
+        elif isinstance(openai_request.stop, list):
+            generation_config["stopSequences"] = openai_request.stop
+    if openai_request.frequency_penalty is not None:
+        # Map frequency_penalty to Gemini's frequencyPenalty
+        generation_config["frequencyPenalty"] = openai_request.frequency_penalty
+    if openai_request.presence_penalty is not None:
+        # Map presence_penalty to Gemini's presencePenalty
+        generation_config["presencePenalty"] = openai_request.presence_penalty
+    if openai_request.n is not None:
+        # Map n (number of completions) to Gemini's candidateCount
+        generation_config["candidateCount"] = openai_request.n
+    if openai_request.seed is not None:
+        # Gemini supports seed for reproducible outputs
+        generation_config["seed"] = openai_request.seed
+    if openai_request.response_format is not None:
+        # Handle JSON mode if specified
+        if openai_request.response_format.get("type") == "json_object":
+            generation_config["responseMimeType"] = "application/json"
+    # generation_config["enableEnhancedCivicAnswers"] = False
+    # Build the request payload
+    request_payload = {
+        "contents": contents,
+        "generationConfig": generation_config,
+        "safetySettings": DEFAULT_SAFETY_SETTINGS,
+        "model": get_base_model_name(openai_request.model)  # Use base model name for API call
+    }
+    # Add Google Search grounding for search models
+    if is_search_model(openai_request.model):
+        request_payload["tools"] = [{"googleSearch": {}}]
+    # Add thinking configuration for thinking models
+    thinking_budget = get_thinking_budget(openai_request.model)
+    if thinking_budget is not None:
+        request_payload["generationConfig"]["thinkingConfig"] = {
+            "thinkingBudget": thinking_budget,
+            "includeThoughts": should_include_thoughts(openai_request.model)
+        }
+    return request_payload
+def gemini_response_to_openai(gemini_response: Dict[str, Any], model: str) -> Dict[str, Any]:
+    """
+    Transform a Gemini API response to OpenAI chat completion format.
+    Args:
+        gemini_response: Response from Gemini API
+        model: Model name to include in response
+    Returns:
+        Dictionary in OpenAI chat completion format
+    """
+    choices = []
+    for candidate in gemini_response.get("candidates", []):
+        role = candidate.get("content", {}).get("role", "assistant")
+        # Map Gemini roles back to OpenAI roles
+        if role == "model":
+            role = "assistant"
+        # Extract and separate thinking tokens from regular content
+        parts = candidate.get("content", {}).get("parts", [])
+        content = ""
+        reasoning_content = ""
+        for part in parts:
+            if not part.get("text"):
+                continue
+            # Check if this part contains thinking tokens
+            if part.get("thought", False):
+                reasoning_content += part.get("text", "")
+            else:
+                content += part.get("text", "")
+        # Build message object
+        message = {
+            "role": role,
+            "content": content,
+        }
+        # Add reasoning_content if there are thinking tokens
+        if reasoning_content:
+            message["reasoning_content"] = reasoning_content
+        choices.append({
+            "index": candidate.get("index", 0),
+            "message": message,
+            "finish_reason": _map_finish_reason(candidate.get("finishReason")),
+        })
+    return {
+        "id": str(uuid.uuid4()),
+        "object": "chat.completion",
+        "created": int(time.time()),
+        "model": model,
+        "choices": choices,
+    }
+def gemini_stream_chunk_to_openai(gemini_chunk: Dict[str, Any], model: str, response_id: str) -> Dict[str, Any]:
+    """
+    Transform a Gemini streaming response chunk to OpenAI streaming format.
+    Args:
+        gemini_chunk: Single chunk from Gemini streaming response
+        model: Model name to include in response
+        response_id: Consistent ID for this streaming response
+    Returns:
+        Dictionary in OpenAI streaming format
+    """
+    choices = []
+    for candidate in gemini_chunk.get("candidates", []):
+        role = candidate.get("content", {}).get("role", "assistant")
+        # Map Gemini roles back to OpenAI roles
+        if role == "model":
+            role = "assistant"
+        # Extract and separate thinking tokens from regular content
+        parts = candidate.get("content", {}).get("parts", [])
+        content = ""
+        reasoning_content = ""
+        for part in parts:
+            if not part.get("text"):
+                continue
+            # Check if this part contains thinking tokens
+            if part.get("thought", False):
+                reasoning_content += part.get("text", "")
+            else:
+                content += part.get("text", "")
+        # Build delta object
+        delta = {}
+        if content:
+            delta["content"] = content
+        if reasoning_content:
+            delta["reasoning_content"] = reasoning_content
+        choices.append({
+            "index": candidate.get("index", 0),
+            "delta": delta,
+            "finish_reason": _map_finish_reason(candidate.get("finishReason")),
+        })
+    return {
+        "id": response_id,
+        "object": "chat.completion.chunk",
+        "created": int(time.time()),
+        "model": model,
+        "choices": choices,
+    }
+def _map_finish_reason(gemini_reason: str) -> str:
+    """
+    Map Gemini finish reasons to OpenAI finish reasons.
+    Args:
+        gemini_reason: Finish reason from Gemini API
+    Returns:
+        OpenAI-compatible finish reason
+    """
+    if gemini_reason == "STOP":
+        return "stop"
+    elif gemini_reason == "MAX_TOKENS":
+        return "length"
+    elif gemini_reason in ["SAFETY", "RECITATION"]:
+        return "content_filter"
+    else:
+        return None

src/utils.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import platform
+from .config import CLI_VERSION
+def get_user_agent():
+    """Generate User-Agent string matching gemini-cli format."""
+    version = CLI_VERSION
+    system = platform.system()
+    arch = platform.machine()
+    return f"GeminiCLI/{version} ({system}; {arch})"
+def get_platform_string():
+    """Generate platform string matching gemini-cli format."""
+    system = platform.system().upper()
+    arch = platform.machine().upper()
+    # Map to gemini-cli platform format
+    if system == "DARWIN":
+        if arch in ["ARM64", "AARCH64"]:
+            return "DARWIN_ARM64"
+        else:
+            return "DARWIN_AMD64"
+    elif system == "LINUX":
+        if arch in ["ARM64", "AARCH64"]:
+            return "LINUX_ARM64"
+        else:
+            return "LINUX_AMD64"
+    elif system == "WINDOWS":
+        return "WINDOWS_AMD64"
+    else:
+        return "PLATFORM_UNSPECIFIED"
+def get_client_metadata(project_id=None):
+    return {
+        "ideType": "IDE_UNSPECIFIED",
+        "platform": get_platform_string(),
+        "pluginType": "GEMINI",
+        "duetProject": project_id,
+    }