Spaces:

aukaru
/

344

Paused

App Files Files Community

aukaru commited on Jun 14, 2025

Commit

5c5b371

verified ·

1 Parent(s): 07e9c20

Upload 236 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.env.example +215 -0
.gitattributes +35 -1
.gitignore +11 -0
.husky/pre-push +4 -0
.prettierrc +13 -0
.windsurfrules +33 -0
CODEBASE_GUIDE.md +321 -0
README.md +72 -10
data/.gitignore +2 -0
data/user-files/.gitkeep +0 -0
docker/ci/.gitlab-ci.yml +21 -0
docker/ci/Dockerfile +22 -0
docker/docker-compose-selfhost.yml +17 -0
docker/huggingface/Dockerfile +15 -0
docker/render/Dockerfile +26 -0
docs/assets/aws-request-model-access.png +0 -0
docs/assets/huggingface-createspace.png +3 -0
docs/assets/huggingface-dockerfile.png +0 -0
docs/assets/huggingface-savedockerfile.png +0 -0
docs/assets/openapi-admin-users.yaml +245 -0
docs/aws-configuration.md +58 -0
docs/azure-configuration.md +30 -0
docs/dall-e-configuration.md +71 -0
docs/deploy-huggingface.md +104 -0
docs/deploy-render.md +56 -0
docs/gcp-configuration.md +35 -0
docs/logging-sheets.md +61 -0
docs/pow-captcha.md +135 -0
docs/self-hosting.md +150 -0
docs/user-management.md +85 -0
docs/user-quotas.md +36 -0
http-client.env.json +9 -0
package-lock.json +0 -0
package.json +96 -0
patches/README.md +23 -0
patches/http-proxy+1.18.1.patch +13 -0
public/css/reset.css +349 -0
public/css/sakura-dark.css +231 -0
public/css/sakura.css +237 -0
public/js/hash-worker.js +120 -0
render.yaml +10 -0
scripts/migrate.ts +39 -0
scripts/oai-reverse-proxy.http +309 -0
scripts/seed-events.ts +102 -0
scripts/test-aws-signing.ts +118 -0
scripts/test-concurrency.js +45 -0
scripts/test-queue.js +53 -0
src/admin/api/events.ts +49 -0
src/admin/api/users.ts +117 -0
src/admin/auth.ts +54 -0

.env.example ADDED Viewed

	@@ -0,0 +1,215 @@

+# To customize your server, make a copy of this file to `.env` and edit any
+# values you want to change. Be sure to remove the `#` at the beginning of each
+# line you want to modify.
+# All values have reasonable defaults, so you only need to change the ones you
+# want to override.
+# Use production mode unless you are developing locally.
+NODE_ENV=production
+# Detail level of diagnostic logging. (trace | debug | info | warn | error)
+# LOG_LEVEL=info
+# ------------------------------------------------------------------------------
+# General settings:
+# The title displayed on the info page.
+# SERVER_TITLE=Coom Tunnel
+# URL for the image displayed on the login page.
+# If not set, no image will be displayed.
+# LOGIN_IMAGE_URL=https://example.com/your-logo.png
+# Whether to enable the token-based or password-based login for the main info page.
+# Defaults to true. Set to false to disable login and make the info page public.
+# ENABLE_INFO_PAGE_LOGIN=true
+# Authentication mode for the service info page. (token | password)
+# If 'token', any valid user token is used (requires GATEKEEPER='user_token' mode).
+# If 'password', SERVICE_INFO_PASSWORD is used.
+# Defaults to 'token' if ENABLE_INFO_PAGE_LOGIN is true.
+# SERVICE_INFO_AUTH_MODE=token
+# Password for the service info page if SERVICE_INFO_AUTH_MODE is 'password'.
+# SERVICE_INFO_PASSWORD=your-service-info-password
+# The route name used to proxy requests to APIs, relative to the Web site root.
+# PROXY_ENDPOINT_ROUTE=/proxy
+# Text model requests allowed per minute per user.
+# TEXT_MODEL_RATE_LIMIT=4
+# Image model requests allowed per minute per user.
+# IMAGE_MODEL_RATE_LIMIT=2
+# Max number of context tokens a user can request at once.
+# Increase this if your proxy allow GPT 32k or 128k context
+# MAX_CONTEXT_TOKENS_OPENAI=32768
+# MAX_CONTEXT_TOKENS_ANTHROPIC=32768
+# Max number of output tokens a user can request at once.
+# MAX_OUTPUT_TOKENS_OPENAI=1024
+# MAX_OUTPUT_TOKENS_ANTHROPIC=1024
+# Whether to show the estimated cost of consumed tokens on the info page.
+# SHOW_TOKEN_COSTS=false
+# Whether to automatically check API keys for validity.
+# Disabled by default in local development mode, but enabled in production.
+# CHECK_KEYS=true
+# Which model types users are allowed to access.
+# The following model families are recognized:
+# turbo | gpt4 | gpt4-32k | gpt4-turbo | gpt4o | o1 | dall-e | claude
+# | claude-opus | gemini-flash | gemini-pro | gemini-ultra | mistral-tiny |
+# | mistral-small | mistral-medium | mistral-large | aws-claude |
+# | aws-claude-opus | gcp-claude | gcp-claude-opus | azure-turbo | azure-gpt4
+# | azure-gpt4-32k | azure-gpt4-turbo | azure-gpt4o | azure-o1 | azure-dall-e
+# | azure-gpt45 | azure-o1-mini | azure-o3-mini | deepseek | xai | o3 | o4-mini | gpt41 | gpt41-mini | gpt41-nano
+# By default, all models are allowed
+# To dissalow any, uncomment line below and edit
+# ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt45,gpt4-turbo,gpt4o,claude,claude-opus,gemini-flash,gemini-pro,gemini-ultra,mistral-tiny,mistral-small,mistral-medium,mistral-large,aws-claude,aws-claude-opus,gcp-claude,gcp-claude-opus,azure-turbo,azure-gpt4,azure-gpt4-32k,azure-gpt4-turbo,azure-gpt4o,azure-gpt45,azure-o1-mini,azure-o3-mini,deepseek
+# Which services can be used to process prompts containing images via multimodal
+# models. The following services are recognized:
+# openai | anthropic | aws | gcp | azure | google-ai | mistral-ai | xai
+# Do not enable this feature unless all users are trusted, as you will be liable
+# for any user-submitted images containing illegal content.
+# By default, no image services are allowed and image prompts are rejected.
+# ALLOWED_VISION_SERVICES=
+# Whether prompts should be logged to Google Sheets.
+# Requires additional setup. See `docs/google-sheets.md` for more information.
+# PROMPT_LOGGING=false
+# Specifies the number of proxies or load balancers in front of the server.
+# For Cloudflare or Hugging Face deployments, the default of 1 is correct.
+# For any other deployments, please see config.ts as the correct configuration
+# depends on your setup. Misconfiguring this value can result in problems
+# accurately tracking IP addresses and enforcing rate limits.
+# TRUSTED_PROXIES=1
+# Whether cookies should be set without the Secure flag, for hosts that don't
+# support SSL. True by default in development, false in production.
+# USE_INSECURE_COOKIES=false
+# Reorganizes requests in the queue according to their token count, placing
+# larger prompts further back. The penalty is determined by (promptTokens *
+# TOKENS_PUNISHMENT_FACTOR). A value of 1.0 adds one second per 1000 tokens.
+# When there is no queue or it is very short, the effect is negligible (this
+# setting only reorders the queue, it does not artificially delay requests).
+# TOKENS_PUNISHMENT_FACTOR=0.0
+# Captcha verification settings. Refer to docs/pow-captcha.md for guidance.
+# CAPTCHA_MODE=none
+# POW_TOKEN_HOURS=24
+# POW_TOKEN_MAX_IPS=2
+# POW_DIFFICULTY_LEVEL=low
+# POW_CHALLENGE_TIMEOUT=30
+# -------------------------------------------------------------------------------
+# Blocking settings:
+# Allows blocking requests depending on content, referers, or IP addresses.
+# This is a convenience feature; if you need more robust functionality it is
+# highly recommended to put this application behind nginx or Cloudflare, as they
+# will have better performance.
+# IP addresses or CIDR blocks from which requests will be blocked.
+# IP_BLACKLIST=10.0.0.1/24
+# URLs from which requests will be blocked.
+# BLOCKED_ORIGINS=reddit.com,9gag.com
+# Message to show when requests are blocked.
+# BLOCK_MESSAGE="You must be over the age of majority in your country to use this service."
+# Destination to redirect blocked requests to.
+# BLOCK_REDIRECT="https://roblox.com/"
+# Comma-separated list of phrases that will be rejected. Surround phrases with
+# quotes if they contain commas. You can use regular expression tokens.
+# Avoid overly broad phrases as will trigger on any match in the entire prompt.
+# REJECT_PHRASES="phrase one,phrase two,"phrase three, which has a comma",phrase four"
+# Message to show when requests are rejected.
+# REJECT_MESSAGE="You can't say that here."
+# ------------------------------------------------------------------------------
+# Optional settings for user management, access control, and quota enforcement:
+# See `docs/user-management.md` for more information and setup instructions.
+# See `docs/user-quotas.md` to learn how to set up quotas.
+# Which access control method to use. (none | proxy_key | user_token)
+# GATEKEEPER=none
+# Which persistence method to use. (memory | firebase_rtdb | sqlite)
+# GATEKEEPER_STORE=memory
+# If using sqlite store, path to the SQLite database file for user data.
+# Defaults to data/user-store.sqlite in the project directory.
+# SQLITE_USER_STORE_PATH=data/user-store.sqlite3
+# Maximum number of unique IPs a user can connect from. (0 for unlimited)
+# MAX_IPS_PER_USER=0
+# Whether user_tokens should be automatically disabled when reaching the IP limit.
+# MAX_IPS_AUTO_BAN=true
+# With user_token gatekeeper, whether to allow users to change their nickname.
+# ALLOW_NICKNAME_CHANGES=true
+# Default token quotas for each model family. (0 for unlimited)
+# Specify as TOKEN_QUOTA_MODEL_FAMILY=value (replacing dashes with underscores).
+# eg. TOKEN_QUOTA_TURBO=0, TOKEN_QUOTA_GPT4=1000000, TOKEN_QUOTA_GPT4_32K=100000
+# "Tokens" for image-generation models are counted at a rate of 100000 tokens
+# per US$1.00 generated, which is similar to the cost of GPT-4 Turbo.
+# DALL-E 3 costs around US$0.10 per image (10000 tokens).
+# See `docs/dall-e-configuration.md` for more information.
+# TOKEN_QUOTA_DALL_E=0
+# How often to refresh token quotas. (hourly | daily)
+# Leave unset to never automatically refresh quotas.
+# QUOTA_REFRESH_PERIOD=daily
+# -------------------------------------------------------------------------------
+# HTTP agent settings:
+# If you need to change how the proxy makes requests to other servers, such
+# as when checking keys or forwarding users' requests to external services,
+# you can configure an alternative HTTP agent. Otherwise the default OS settings
+# will be used.
+# The name of the network interface to use. The first external IPv4 address
+# belonging to this interface will be used for outgoing requests.
+# HTTP_AGENT_INTERFACE=enp0s3
+# The URL of a proxy server to use. Supports SOCKS4, SOCKS5, HTTP, and HTTPS.
+# Note that if your proxy server issues a self-signed certificate, you may need
+# NODE_EXTRA_CA_CERTS set to the path to your certificate. You will need to set
+# that variable in your environment, not in this file.
+# HTTP_AGENT_PROXY_URL=http://test:test@127.0.0.1:8000
+# ------------------------------------------------------------------------------
+# Secrets and keys:
+# For Huggingface, set them via the Secrets section in your Space's config UI. Dp not set them in .env.
+# For Render, create a "secret file" called .env using the Environment tab.
+# You can add multiple API keys by separating them with a comma.
+# For AWS credentials, separate the access key ID, secret key, and region with a colon.
+# For GCP credentials, separate the project ID, client email, region, and private key with a colon.
+OPENAI_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+ANTHROPIC_KEY=sk-ant-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+GOOGLE_AI_KEY=AIzaxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+# See `docs/aws-configuration.md` for more information, there may be additional steps required to set up AWS.
+AWS_CREDENTIALS=myaccesskeyid:mysecretkey:us-east-1,anotheraccesskeyid:anothersecretkey:us-west-2
+# See `docs/azure-configuration.md` for more information, there may be additional steps required to set up Azure.
+AZURE_CREDENTIALS=azure-resource-name:deployment-id:api-key,another-azure-resource-name:another-deployment-id:another-api-key
+GCP_CREDENTIALS=project-id:client-email:region:private-key
+# With proxy_key gatekeeper, the password users must provide to access the API.
+# PROXY_KEY=your-secret-key
+# With user_token gatekeeper, the admin password used to manage users.
+# ADMIN_KEY=your-very-secret-key
+# Restrict access to the admin interface to specific IP addresses, specified
+# as a comma-separated list of CIDR ranges.
+# ADMIN_WHITELIST=0.0.0.0/0
+# With firebase_rtdb gatekeeper storage, the Firebase project credentials.
+# FIREBASE_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+# FIREBASE_RTDB_URL=https://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.firebaseio.com
+# With prompt logging, the Google Sheets credentials.
+# GOOGLE_SHEETS_SPREADSHEET_ID=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
+# GOOGLE_SHEETS_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

.gitattributes CHANGED Viewed

	@@ -1 +1,35 @@
1	- ~~nonono-main~~.~~zip~~ filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+docs/assets/huggingface-createspace.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+.aider*
+.env*
+!.env.vault
+.venv
+.vscode
+.idea
+build
+greeting.md
+node_modules
+.windsurfrules
+http-client.private.env.json

.husky/pre-push ADDED Viewed

	@@ -0,0 +1,4 @@

+#!/usr/bin/env sh
+. "$(dirname -- "$0")/_/husky.sh"
+npm run type-check

.prettierrc ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "plugins": ["prettier-plugin-ejs"],
+  "overrides": [
+    {
+      "files": "*.ejs",
+      "options": {
+        "printWidth": 120,
+        "bracketSameLine": true
+      }
+    }
+  ],
+  "trailingComma": "es5"
+}

.windsurfrules ADDED Viewed

	@@ -0,0 +1,33 @@

+You are a Senior Full Stack Developer and an Expert in ReactJS, NextJS, JavaScript, TypeScript, HTML, CSS and modern UI/UX frameworks (e.g., TailwindCSS, Shadcn, Radix). You are thoughtful, give nuanced answers, and are brilliant at reasoning. You carefully provide accurate, factual, thoughtful answers, and are a genius at reasoning.
+- Follow the user’s requirements carefully & to the letter.
+- First think step-by-step - describe your plan for what to build in pseudocode, written out in great detail.
+- Confirm, then write code!
+- Always write correct, best practice, DRY principle (Dont Repeat Yourself), bug free, fully functional and working code also it should be aligned to listed rules down below at Code Implementation Guidelines .
+- Focus on easy and readability code, over being performant.
+- Fully implement all requested functionality.
+- Leave NO todo’s, placeholders or missing pieces.
+- Ensure code is complete! Verify thoroughly finalised.
+- Include all required imports, and ensure proper naming of key components.
+- Be concise Minimize any other prose.
+- If you think there might not be a correct answer, you say so.
+- If you do not know the answer, say so, instead of guessing.
+### Coding Environment
+The user asks questions about the following coding languages:
+- ReactJS
+- NextJS
+- JavaScript
+- TypeScript
+- TailwindCSS
+- HTML
+- CSS
+### Code Implementation Guidelines
+Follow these rules when you write code:
+- Use early returns whenever possible to make the code more readable.
+- Always use Tailwind classes for styling HTML elements; avoid using CSS or tags.
+- Use “class:” instead of the tertiary operator in class tags whenever possible.
+- Use descriptive variable and function/const names. Also, event functions should be named with a “handle” prefix, like “handleClick” for onClick and “handleKeyDown” for onKeyDown.
+- Implement accessibility features on elements. For example, a tag should have a tabindex=“0”, aria-label, on:click, and on:keydown, and similar attributes.
+- Use consts instead of functions, for example, “const toggle = () =>”. Also, define a type if possible.

CODEBASE_GUIDE.md ADDED Viewed

	@@ -0,0 +1,321 @@

+# Project Codebase Guide
+This document serves as a guide and index for the project codebase, designed to help developers and AI agents quickly understand its structure, components, and how to contribute.
+## Table of Contents
+1.  [Project Overview](#project-overview)
+2.  [Directory Structure](#directory-structure)
+3.  [Core Components](#core-components)
+    *   [Configuration (`src/config.ts`)](#configuration)
+    *   [Server Entry Point (`src/server.ts`)](#server-entry-point)
+    *   [Proxy Layer (`src/proxy/`)](#proxy-layer)
+    *   [User Management (`src/user/`)](#user-management)
+    *   [Admin Interface (`src/admin/`)](#admin-interface)
+    *   [Shared Utilities (`src/shared/`)](#shared-utilities)
+4.  [Proxy Functionality](#proxy-functionality)
+    *   [Routing (`src/proxy/routes.ts`)](#proxy-routing)
+    *   [Supported Models & Providers](#supported-models--providers)
+    *   [Middleware (`src/proxy/middleware/`)](#proxy-middleware)
+    *   [Adding New Models](#adding-new-models)
+    *   [Adding New APIs/Providers](#adding-new-apisproviders)
+5.  [Model Management](#model-management)
+    *   [Model Family Definitions](#model-family-definitions)
+    *   [Adding OpenAI Models](#adding-openai-models)
+    *   [Model Mapping & Routing](#model-mapping--routing)
+    *   [Service Information](#service-information)
+    *   [Step-by-Step Guide for Adding a New Model](#step-by-step-guide-for-adding-a-new-model)
+    *   [Model Patterns and Versioning](#model-patterns-and-versioning)
+    *   [Response Format Handling](#response-format-handling)
+6.  [Key Management](#key-management)
+    *   [Key Pool System](#key-pool-system)
+    *   [Provider-Specific Key Management](#provider-specific-key-management)
+    *   [Key Rotation and Health Checks](#key-rotation-and-health-checks)
+7.  [Data Management](#data-management)
+    *   [Database (`src/shared/database/`)](#database)
+    *   [File Storage (`src/shared/file-storage/`)](#file-storage)
+8.  [Authentication & Authorization](#authentication--authorization)
+9.  [Logging & Monitoring](#logging--monitoring)
+10. [Deployment](#deployment)
+11. [Contributing](#contributing)
+## Project Overview
+This project provides a proxy layer for various Large Language Models (LLMs) and potentially other AI APIs. It aims to offer a unified interface, manage API keys securely, handle rate limiting, usage tracking, and potentially add features like response caching or prompt modification.
+## Directory Structure
+```
+.
+├── .env.example          # Example environment variables
+├── .gitattributes        # Git attributes
+├── .gitignore            # Git ignore rules
+├── .husky/               # Git hooks
+├── .prettierrc           # Code formatting rules
+├── CODEBASE_GUIDE.md     # This file
+├── README.md             # Project README
+├── data/                 # Data files (e.g., SQLite DB)
+├── docker/               # Docker configuration
+├── docs/                 # Documentation files
+├── http-client.env.json  # HTTP client environment
+├── package-lock.json     # NPM lock file
+├── package.json          # Project dependencies and scripts
+├── patches/              # Patches for dependencies
+├── public/               # Static assets served by the web server
+├── render.yaml           # Render deployment configuration
+├── scripts/              # Utility scripts
+├── src/                  # Source code
+│   ├── admin/            # Admin interface logic
+│   ├── config.ts         # Application configuration
+│   ├── info-page.ts      # Logic for the info page
+│   ├── logger.ts         # Logging setup
+│   ├── proxy/            # Core proxy logic for different providers
+│   ├── server.ts         # Express server setup and main entry point
+│   ├── service-info.ts   # Service information logic
+│   ├── shared/           # Shared utilities, types, and modules
+│   └── user/             # User management logic
+├── tsconfig.json         # TypeScript configuration
+```
+## Core Components
+### Configuration (`src/config.ts`)
+*   Loads environment variables and defines application settings.
+*   Contains configuration for database connections, API keys (placeholders/retrieval methods), logging levels, rate limits, etc.
+*   Uses `dotenv` and potentially a schema validation library (like Zod) to ensure required variables are present.
+### Server Entry Point (`src/server.ts`)
+*   Initializes the Express application.
+*   Sets up core middleware (e.g., body parsing, CORS, logging).
+*   Mounts routers for different parts of the application (admin, user, proxy).
+*   Starts the HTTP server.
+### Proxy Layer (`src/proxy/`)
+*   The heart of the application, handling requests to downstream AI APIs.
+*   Contains individual modules for each supported provider (e.g., `openai.ts`, `anthropic.ts`).
+*   Handles request transformation, authentication against the target API, and response handling.
+*   Uses middleware for common proxy tasks.
+### User Management (`src/user/`)
+*   Handles user registration, login, session management, and potentially API key generation/management for end-users.
+*   Likely interacts with the database (`src/shared/database/`).
+### Admin Interface (`src/admin/`)
+*   Provides an interface for administrators to manage users, monitor usage, configure settings, etc.
+*   May have its own set of routes and views.
+### Shared Utilities (`src/shared/`)
+*   Contains reusable code across different modules.
+    *   `api-schemas/`: Zod schemas for API request/response validation.
+    *   `database/`: Database connection, schemas (e.g., Prisma), and query logic.
+    *   `errors.ts`: Custom error classes.
+    *   `key-management/`: Logic for managing API keys (if applicable).
+    *   `models.ts`: Core data models/types used throughout the application.
+    *   `prompt-logging/`: Logic for logging prompts and responses.
+    *   `tokenization/`: Utilities for counting tokens.
+    *   `utils.ts`: General utility functions.
+## Proxy Functionality
+### Proxy Routing (`src/proxy/routes.ts`)
+*   Defines the API endpoints for the proxy service (e.g., `/v1/chat/completions`).
+*   Maps incoming requests to the appropriate provider-specific handler based on the request path, headers, or body content (e.g., model requested).
+*   Applies relevant middleware (authentication, rate limiting, queuing, etc.).
+### Supported Models & Providers
+*   **OpenAI:** Handled in `src/proxy/openai.ts`. Supports models like GPT-4, GPT-3.5-turbo, as well as o-series models (o1, o1-mini, o1-pro, o3, o3-mini, o3-pro, o4-mini). Handles chat completions and potentially image generation (`src/proxy/openai-image.ts`).
+*   **Anthropic:** Handled in `src/proxy/anthropic.ts`. Supports Claude models. May use AWS Bedrock (`src/proxy/aws-claude.ts`) or Anthropic's direct API.
+*   **Google AI / Vertex AI:** Handled in `src/proxy/google-ai.ts` and `src/proxy/gcp.ts`. Supports Gemini models (gemini-flash, gemini-pro, gemini-ultra).
+*   **Mistral AI:** Handled in `src/proxy/mistral-ai.ts`. Supports Mistral models via their API or potentially AWS (`src/proxy/aws-mistral.ts`).
+*   **Azure OpenAI:** Handled in `src/proxy/azure.ts`. Provides an alternative endpoint for OpenAI models via Azure.
+*   **Deepseek:** Handled in `src/proxy/deepseek.ts`.
+*   **Xai:** Handled in `src/proxy/xai.ts`.
+*   **AWS (General):** `src/proxy/aws.ts` might contain shared AWS logic (e.g., authentication).
+### Middleware (`src/proxy/middleware/`)
+*   **`gatekeeper.ts`:** Likely handles initial request validation, authentication, and authorization checks before hitting provider logic. Checks origin (`check-origin.ts`), potentially custom tokens (`check-risu-token.ts`).
+*   **`rate-limit.ts`:** Implements rate limiting logic, potentially per-user or per-key.
+*   **`queue.ts`:** Manages request queuing, possibly to handle concurrency limits or prioritize requests.
+### Adding New Models
+1.  **Identify the Provider:** Determine if the new model belongs to an existing provider (e.g., a new OpenAI model) or a new one.
+2.  **Update Provider Logic (if existing):**
+    *   Modify the relevant provider file (e.g., `src/proxy/openai.ts`).
+    *   Update model lists or logic that selects/validates models.
+    *   Adjust any request/response transformations if the new model has a different API schema.
+    *   Update model information in shared files like `src/shared/models.ts` if necessary.
+3.  **Update Routing (if necessary):** Modify `src/proxy/routes.ts` if the new model requires a different endpoint or routing logic.
+4.  **Configuration:** Add any new API keys or configuration parameters to `.env.example` and `src/config.ts`.
+5.  **Testing:** Add unit or integration tests for the new model.
+### Adding New APIs/Providers
+1.  **Create Provider Module:** Create a new file in `src/proxy/` (e.g., `src/proxy/new-provider.ts`).
+2.  **Implement Handler:**
+    *   Write the core logic to handle requests for this provider. This typically involves:
+        *   Receiving the standardized request from the router.
+        *   Transforming the request into the format expected by the new provider's API.
+        *   Authenticating with the new provider's API (fetching keys from config).
+        *   Making the API call (consider using a robust HTTP client like `axios` or `node-fetch`).
+        *   Handling streaming responses if applicable (using helpers from `src/shared/streaming.ts`).
+        *   Transforming the provider's response back into a standardized format.
+        *   Handling errors gracefully.
+3.  **Add Routing:**
+    *   Import the new handler in `src/proxy/routes.ts`.
+    *   Add new routes or modify existing routing logic to direct requests to the new handler based on model name, path, or other criteria.
+    *   Apply necessary middleware (gatekeeper, rate limiter, queue).
+4.  **Create Key Management:**
+    *   Create a new directory in `src/shared/key-management/` for the provider.
+    *   Implement provider-specific key management (key checkers, token counters).
+5.  **Configuration:**
+    *   Add configuration variables (API keys, base URLs) to `.env.example` and `src/config.ts`.
+    *   Update `src/config.ts` to load and validate the new variables.
+6.  **Model Information:** Add details about the new provider and its models to `src/shared/models.ts` or similar shared locations.
+7.  **Tokenization (if applicable):** If token counting is needed, add or update tokenization logic in `src/shared/tokenization/`.
+8.  **Testing:** Implement thorough tests for the new provider integration.
+9.  **Documentation:** Update this guide and any other relevant documentation.
+## Model Management
+### Model Family Definitions
+*   **Model Family Definitions:** The project uses a family-based approach to group similar models together. These are defined in `src/shared/models.ts`.
+*   Each model is part of a model family (e.g., "gpt4", "claude", "gemini-pro") which helps with routing, key management, and feature support.
+*   The `MODEL_FAMILIES` array contains all supported model families, and the `MODEL_FAMILY_SERVICE` mapping connects each family to its provider service.
+### Adding OpenAI Models
+When adding new OpenAI models to the codebase, there are several files that must be updated:
+1. **Update Model Types (`src/shared/models.ts`):**
+   - Add the new model to the `OpenAIModelFamily` type
+   - Add the model to the `MODEL_FAMILIES` array
+   - Add the Azure variants for the model if applicable
+   - Add the model to `MODEL_FAMILY_SERVICE` mapping
+   - Update `OPENAI_MODEL_FAMILY_MAP` with regex patterns to match the model names
+2. **Update Context Size Limits (`src/proxy/middleware/request/preprocessors/validate-context-size.ts`):**
+   - Add regex matching for the new model
+   - Set the appropriate context token limit for the model
+3. **Update Token Cost Tracking (`src/shared/stats.ts`):**
+   - Add pricing information for the new model in the `getTokenCostUsd` function
+   - Include both input and output prices in the comments for clarity
+4. **Update Feature Support Checks (`src/proxy/openai.ts`):**
+   - If the model supports special features like the reasoning API parameter (`isO1Model` function), update the appropriate function
+   - For model feature detection, prefer using regex patterns over explicit lists when possible, as this handles date-stamped versions better
+5. **Update Display Names (`src/info-page.ts`):**
+   - Add friendly display names for the new models in the `MODEL_FAMILY_FRIENDLY_NAME` object
+6. **Update Key Management Provider Files:**
+   - For OpenAI keys in `src/shared/key-management/openai/provider.ts`, add token counters for the new models
+   - For Azure OpenAI keys in `src/shared/key-management/azure/provider.ts`, add token counters for the Azure versions
+### Model Patterns and Versioning
+The codebase handles several patterns for model naming and versioning:
+1. **Date-stamped Models:** Many models include date stamps (e.g., `gpt-4-0125-preview`). The regex patterns in `OPENAI_MODEL_FAMILY_MAP` account for these with patterns like `^gpt-4o(-\\d{4}-\\d{2}-\\d{2})?$`.
+2. **O-Series Models:** OpenAI's o-series models (o1, o1-mini, o1-pro, o3, o3-mini, o3-pro, o4-mini) follow a different naming convention. The codebase handles these with dedicated model families and regex patterns.
+3. **Preview/Non-Preview Variants:** Some models have preview variants (e.g., `gpt-4.5-preview`). The regex patterns in `OPENAI_MODEL_FAMILY_MAP` account for these with patterns like `^gpt-4\\.5(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$`.
+When adding new models, try to follow the existing patterns for consistency.
+### Response Format Handling
+The codebase includes special handling for different API response formats:
+1. **Chat vs. Text Completions:** There's transformation logic in `openai.ts` to convert between chat completions and text completions formats (`transformTurboInstructResponse`).
+2. **Newer API Formats:** For newer APIs like the Responses API, there's transformation logic (`transformResponsesApiResponse`) to convert responses to a format compatible with existing clients.
+When adding support for new models or APIs, consider whether transformation is needed to maintain compatibility with existing clients.
+## Key Management
+### Key Pool System
+The project uses a sophisticated key pool system (`src/shared/key-management/key-pool.ts`) to manage API keys for different providers. Key features include:
+* **Key Selection:** The system selects the appropriate key based on model family, region preferences, and other criteria.
+* **Rotation:** Keys are rotated to distribute usage and avoid hitting rate limits.
+* **Health Checks:** Keys are checked periodically to ensure they're still valid and within rate limits.
+### Provider-Specific Key Management
+Each provider has its own key management module in `src/shared/key-management/`:
+* **Key Checkers:** Each provider implements key checkers to validate keys and check their status.
+* **Token Counters:** Providers implement token counting logic specific to their pricing model.
+* **Models Support:** Keys are associated with specific model families they support.
+When adding a new model or provider, you'll need to update or create the appropriate key management files.
+### Key Rotation and Health Checks
+The key pool system includes logic for:
+* **Rotation Strategy:** Keys are selected based on a prioritization strategy (`prioritize-keys.ts`).
+* **Disabling Unhealthy Keys:** Keys that fail health checks are temporarily disabled.
+* **Rate Limit Awareness:** The system tracks usage to avoid hitting provider rate limits.
+## Data Management
+### Database (`src/shared/database/`)
+*   Likely uses Prisma or a similar ORM.
+*   Defines database schemas (e.g., for users, API keys, usage logs).
+*   Provides functions for interacting with the database.
+*   Configuration is managed in `src/config.ts`.
+### File Storage (`src/shared/file-storage/`)
+*   May be used for storing logs, cached data, or user-uploaded files.
+*   Could integrate with local storage or cloud providers (e.g., S3, GCS).
+## Authentication & Authorization
+*   **User Auth:** Handled in `src/user/` potentially using sessions (`src/shared/with-session.ts`) or JWTs.
+*   **Proxy Auth:** The `gatekeeper.ts` middleware likely verifies incoming requests to the proxy endpoints. This could involve checking:
+    *   Custom API keys stored in the database (`src/shared/database/`).
+    *   Specific tokens (`check-risu-token.ts`).
+    *   HMAC signatures (`src/shared/hmac-signing.ts`).
+    *   Origin checks (`check-origin.ts`).
+*   **Downstream Auth:** Each provider module (`src/proxy/*.ts`) handles authentication with the actual AI service API using keys from the configuration.
+## Logging & Monitoring
+*   **Logging:** Configured in `src/logger.ts`, likely using a library like `pino` or `winston`. Logs requests, errors, and important events.
+*   **Prompt Logging:** Specific logic for logging prompts and responses might exist in `src/shared/prompt-logging/`.
+*   **Stats/Monitoring:** `src/shared/stats.ts` might handle collecting and exposing application metrics.
+## Deployment
+*   **Docker:** The project likely includes Docker configuration for containerized deployment.
+*   **Render:** The `render.yaml` file suggests the project is or can be deployed on Render.
+*   **Environment Variables:** The `.env.example` file provides a template for required environment variables in production.
+## Contributing
+When contributing to this project:
+1. **Follow Coding Standards:** Use the established patterns and standards in the codebase. The `.prettierrc` file defines code formatting rules.
+2. **Update Documentation:** Keep this guide updated when adding new components or changing existing ones.
+3. **Add Tests:** Ensure your changes are tested appropriately.
+4. **Update Configuration:** If your changes require new environment variables, update `.env.example`.
+*This guide provides a high-level overview. For detailed information, refer to the specific source code files.*

README.md CHANGED Viewed

@@ -1,10 +1,72 @@
----
-title: '344'
-emoji: 🏃
-colorFrom: green
-colorTo: gray
-sdk: static
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# OAI Reverse Proxy - just a shitty fork
+Reverse proxy server for various LLM APIs.
+### Table of Contents
+<!-- TOC -->
+* [OAI Reverse Proxy](#oai-reverse-proxy)
+    * [Table of Contents](#table-of-contents)
+  * [What is this?](#what-is-this)
+  * [Features](#features)
+  * [Usage Instructions](#usage-instructions)
+    * [Personal Use (single-user)](#personal-use-single-user)
+      * [Updating](#updating)
+      * [Local Development](#local-development)
+    * [Self-hosting](#self-hosting)
+  * [Building](#building)
+  * [Forking](#forking)
+<!-- TOC -->
+## What is this?
+This project allows you to run a reverse proxy server for various LLM APIs.
+## Features
+- [x] Support for multiple APIs
+  - [x] [OpenAI](https://openai.com/)
+  - [x] [Anthropic](https://www.anthropic.com/)
+  - [x] [AWS Bedrock](https://aws.amazon.com/bedrock/) (Claude4 is fucked, dont care)
+  - [x] [Vertex AI (GCP)](https://cloud.google.com/vertex-ai/)
+  - [x] [Google MakerSuite/Gemini API](https://ai.google.dev/)
+  - [x] [Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service)
+- [x] Translation from OpenAI-formatted prompts to any other API, including streaming responses
+- [x] Multiple API keys with rotation and rate limit handling
+- [x] Basic user management
+  - [x] Simple role-based permissions
+  - [x] Per-model token quotas
+  - [x] Temporary user accounts
+- [x] Event audit logging
+- [x] Optional full logging of prompts and completions
+- [x] Abuse detection and prevention
+  - [x] IP address and user token model invocation rate limits
+  - [x] IP blacklists
+  - [x] Proof-of-work challenge for access by anonymous users
+## Usage Instructions
+If you'd like to run your own instance of this server, you'll need to deploy it somewhere and configure it with your API keys. A few easy options are provided below, though you can also deploy it to any other service you'd like if you know what you're doing and the service supports Node.js.
+### Personal Use (single-user)
+If you just want to run the proxy server to use yourself without hosting it for others:
+1. Install [Node.js](https://nodejs.org/en/download/) >= 18.0.0
+2. Clone this repository
+3. Create a `.env` file in the root of the project and add your API keys. See the [.env.example](./.env.example) file for an example.
+4. Install dependencies with `npm install`
+5. Run `npm run build`
+6. Run `npm start`
+#### Updating
+You must re-run `npm install` and `npm run build` whenever you pull new changes from the repository.
+#### Local Development
+Use `npm run start:dev` to run the proxy in development mode with watch mode enabled. Use `npm run type-check` to run the type checker across the project.
+### Self-hosting
+[See here for instructions on how to self-host the application on your own VPS or local machine and expose it to the internet for others to use.](./docs/self-hosting.md)
+**Ensure you set the `TRUSTED_PROXIES` environment variable according to your deployment.** Refer to [.env.example](./.env.example) and [config.ts](./src/config.ts) for more information.
+## Building
+To build the project, run `npm run build`. This will compile the TypeScript code to JavaScript and output it to the `build` directory. You should run this whenever you pull new changes from the repository.
+Note that if you are trying to build the server on a very memory-constrained (<= 1GB) VPS, you may need to run the build with `NODE_OPTIONS=--max_old_space_size=2048 npm run build` to avoid running out of memory during the build process, assuming you have swap enabled.  The application itself should run fine on a 512MB VPS for most reasonable traffic levels.
+## Forking
+If you are forking the repository on GitGud, you may wish to disable GitLab CI/CD or you will be spammed with emails about failed builds due not having any CI runners. You can do this by going to *Settings > General > Visibility, project features, permissions* and then disabling the "CI/CD" feature.

data/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *
2	+ !.gitkeep

data/user-files/.gitkeep ADDED Viewed

File without changes

docker/ci/.gitlab-ci.yml ADDED Viewed

	@@ -0,0 +1,21 @@

+stages:
+  - build
+build_image:
+  stage: build
+  image:
+    name: gcr.io/kaniko-project/executor:debug
+    entrypoint: [""]
+  script:
+    - |
+      if [ "$CI_COMMIT_REF_NAME" = "main" ]; then
+        TAG="latest"
+      else
+        TAG=$CI_COMMIT_REF_NAME
+      fi
+    - echo "Building image with tag $TAG"
+    - BASE64_AUTH=$(echo -n "$DOCKER_HUB_USERNAME:$DOCKER_HUB_ACCESS_TOKEN" | base64)
+    - echo "{\"auths\":{\"https://index.docker.io/v1/\":{\"auth\":\"$BASE64_AUTH\"}}}" > /kaniko/.docker/config.json
+    - /kaniko/executor --context $CI_PROJECT_DIR --dockerfile $CI_PROJECT_DIR/docker/ci/Dockerfile --destination docker.io/khanonci/oai-reverse-proxy:$TAG --build-arg CI_COMMIT_REF_NAME=$CI_COMMIT_REF_NAME --build-arg CI_COMMIT_SHA=$CI_COMMIT_SHA --build-arg CI_PROJECT_PATH=$CI_PROJECT_PATH
+  only:
+    - main

docker/ci/Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+FROM node:18-bullseye-slim
+WORKDIR /app
+COPY . .
+RUN npm ci
+RUN npm run build
+RUN npm prune --production
+EXPOSE 7860
+ENV PORT=7860
+ENV NODE_ENV=production
+ARG CI_COMMIT_REF_NAME
+ARG CI_COMMIT_SHA
+ARG CI_PROJECT_PATH
+ENV GITGUD_BRANCH=$CI_COMMIT_REF_NAME
+ENV GITGUD_COMMIT=$CI_COMMIT_SHA
+ENV GITGUD_PROJECT=$CI_PROJECT_PATH
+CMD [ "npm", "start" ]

docker/docker-compose-selfhost.yml ADDED Viewed

	@@ -0,0 +1,17 @@

+# Before running this, create a .env and greeting.md file.
+# Refer to .env.example for the required environment variables.
+# User-generated content is stored in the data directory.
+# When self-hosting, it's recommended to run this behind a reverse proxy like
+# nginx or Caddy to handle SSL/TLS and rate limiting. Refer to
+# docs/self-hosting.md for more information and an example nginx config.
+version: '3.8'
+services:
+  oai-reverse-proxy:
+    image: khanonci/oai-reverse-proxy:latest
+    ports:
+      - "127.0.0.1:7860:7860"
+    env_file:
+      - ./.env
+    volumes:
+      - ./greeting.md:/app/greeting.md
+      - ./data:/app/data

docker/huggingface/Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM node:18-bullseye-slim
+RUN apt-get update && \
+    apt-get install -y git
+RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
+WORKDIR /app
+RUN chown -R 1000:1000 /app
+USER 1000
+RUN npm install
+COPY Dockerfile greeting.md* .env* ./
+RUN npm run build
+EXPOSE 7860
+ENV NODE_ENV=production
+# Huggigface free VMs have 16GB of RAM so we can be greedy
+ENV NODE_OPTIONS="--max-old-space-size=12882"
+CMD [ "npm", "start" ]

docker/render/Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+# syntax = docker/dockerfile:1.2
+FROM node:18-bullseye-slim
+RUN apt-get update && \
+    apt-get install -y curl
+# Unlike Huggingface, Render can only deploy straight from a git repo and
+# doesn't allow you to create or modify arbitrary files via the web UI.
+# To use a greeting file, set `GREETING_URL` to a URL that points to a raw
+# text file containing your greeting, such as a GitHub Gist.
+# You may need to clear the build cache if you change the greeting, otherwise
+# Render will use the cached layer from the previous build.
+WORKDIR /app
+ARG GREETING_URL
+RUN if [ -n "$GREETING_URL" ]; then \
+    curl -sL "$GREETING_URL" > greeting.md; \
+    fi
+COPY . .
+RUN npm install
+RUN npm run build
+RUN --mount=type=secret,id=_env,dst=/etc/secrets/.env cat /etc/secrets/.env >> .env
+EXPOSE 10000
+ENV NODE_ENV=production
+CMD [ "npm", "start" ]

docs/assets/aws-request-model-access.png ADDED Viewed

docs/assets/huggingface-createspace.png ADDED Viewed

Git LFS Details

SHA256: dcb1aa84ef28c7bd228776d8c3fcafa7b006b2f52f4fe2da7b5e9a6e4c9b0931
Pointer size: 131 Bytes
Size of remote file: 157 kB

docs/assets/huggingface-dockerfile.png ADDED Viewed

docs/assets/huggingface-savedockerfile.png ADDED Viewed

docs/assets/openapi-admin-users.yaml ADDED Viewed

	@@ -0,0 +1,245 @@

+openapi: 3.0.0
+info:
+  version: 1.0.0
+  title: User Management API
+paths:
+  /admin/users:
+    get:
+      summary: List all users
+      operationId: getUsers
+      responses:
+        "200":
+          description: A list of users
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  users:
+                    type: array
+                    items:
+                      $ref: "#/components/schemas/User"
+                  count:
+                    type: integer
+                    format: int32
+    post:
+      summary: Create a new user
+      operationId: createUser
+      requestBody:
+        content:
+          application/json:
+            schema:
+              oneOf:
+                - type: object
+                  properties:
+                    type:
+                      type: string
+                      enum: ["normal", "special"]
+                - type: object
+                  properties:
+                    type:
+                      type: string
+                      enum: ["temporary"]
+                    expiresAt:
+                      type: integer
+                      format: int64
+                    tokenLimits:
+                      $ref: "#/components/schemas/TokenCount"
+      responses:
+        "200":
+          description: The created user's token
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  token:
+                    type: string
+    put:
+      summary: Bulk upsert users
+      operationId: bulkUpsertUsers
+      requestBody:
+        content:
+          application/json:
+            schema:
+              type: object
+              properties:
+                users:
+                  type: array
+                  items:
+                    $ref: "#/components/schemas/User"
+      responses:
+        "200":
+          description: The upserted users
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  upserted_users:
+                    type: array
+                    items:
+                      $ref: "#/components/schemas/User"
+                  count:
+                    type: integer
+                    format: int32
+        "400":
+          description: Bad request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+  /admin/users/{token}:
+    get:
+      summary: Get a user by token
+      operationId: getUser
+      parameters:
+        - name: token
+          in: path
+          required: true
+          schema:
+            type: string
+      responses:
+        "200":
+          description: A user
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/User"
+        "404":
+          description: Not found
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+    put:
+      summary: Update a user by token
+      operationId: upsertUser
+      parameters:
+        - name: token
+          in: path
+          required: true
+          schema:
+            type: string
+      requestBody:
+        content:
+          application/json:
+            schema:
+              $ref: "#/components/schemas/User"
+      responses:
+        "200":
+          description: The updated user
+          content:
+            application/json:
+              schema:
+                $ref: "#/components/schemas/User"
+        "400":
+          description: Bad request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+    delete:
+      summary: Disables the user with the given token
+      description: Optionally accepts a `disabledReason` query parameter. Returns the disabled user.
+      parameters:
+        - in: path
+          name: token
+          required: true
+          schema:
+            type: string
+          description: The token of the user to disable
+        - in: query
+          name: disabledReason
+          required: false
+          schema:
+            type: string
+          description: The reason for disabling the user
+      responses:
+        '200':
+          description: The disabled user
+          content:
+            application/json:
+              schema:
+                $ref: '#/components/schemas/User'
+        '400':
+          description: Bad request
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+        '404':
+          description: Not found
+          content:
+            application/json:
+              schema:
+                type: object
+                properties:
+                  error:
+                    type: string
+components:
+  schemas:
+    TokenCount:
+      type: object
+      properties:
+        turbo:
+          type: integer
+          format: int32
+        gpt4:
+          type: integer
+          format: int32
+        "gpt4-32k":
+          type: integer
+          format: int32
+        claude:
+          type: integer
+          format: int32
+    User:
+      type: object
+      properties:
+        token:
+          type: string
+        ip:
+          type: array
+          items:
+            type: string
+        nickname:
+          type: string
+        type:
+          type: string
+          enum: ["normal", "special"]
+        promptCount:
+          type: integer
+          format: int32
+        tokenLimits:
+          $ref: "#/components/schemas/TokenCount"
+        tokenCounts:
+          $ref: "#/components/schemas/TokenCount"
+        createdAt:
+          type: integer
+          format: int64
+        lastUsedAt:
+          type: integer
+          format: int64
+        disabledAt:
+          type: integer
+          format: int64
+        disabledReason:
+          type: string
+        expiresAt:
+          type: integer
+          format: int64

docs/aws-configuration.md ADDED Viewed

	@@ -0,0 +1,58 @@

+# Configuring the proxy for AWS Bedrock
+The proxy supports AWS Bedrock models via the `/proxy/aws/claude` endpoint. There are a few extra steps necessary to use AWS Bedrock compared to the other supported APIs.
+- [Setting keys](#setting-keys)
+- [Attaching policies](#attaching-policies)
+- [Provisioning models](#provisioning-models)
+- [Note regarding logging](#note-regarding-logging)
+## Setting keys
+Use the `AWS_CREDENTIALS` environment variable to set the AWS API keys.
+Like other APIs, you can provide multiple keys separated by commas. Each AWS key, however, is a set of credentials including the access key, secret key, and region. These are separated by a colon (`:`).
+For example:
+```
+AWS_CREDENTIALS=AKIA000000000000000:somesecretkey:us-east-1,AKIA111111111111111:anothersecretkey:us-west-2
+```
+## Attaching policies
+Unless your credentials belong to the root account, the principal will need to be granted the following permissions:
+- `bedrock:InvokeModel`
+- `bedrock:InvokeModelWithResponseStream`
+- `bedrock:GetModelInvocationLoggingConfiguration`
+  - The proxy needs this to determine whether prompt/response logging is enabled. By default, the proxy won't use credentials unless it can conclusively determine that logging is disabled, for privacy reasons.
+Use the IAM console or the AWS CLI to attach these policies to the principal associated with the credentials.
+## Provisioning models
+AWS does not automatically provide accounts with access to every model. You will need to provision the models you want to use, in the regions you want to use them in. You can do this from the AWS console.
+⚠️ **Models are region-specific.** Currently AWS only offers Claude in a small number of regions. Switch to the AWS region you want to use, then go to the models page and request access to **Anthropic / Claude**.
+![](./assets/aws-request-model-access.png)
+Access is generally granted more or less instantly. Once your account has access, you can enable the model by checking the box next to it.
+You can also request Claude Instant, but support for this isn't fully implemented yet.
+### Supported model IDs
+Users can send these model IDs to the proxy to invoke the corresponding models.
+- **Claude**
+  - `anthropic.claude-v1` (~18k context, claude 1.3 -- EOL 2024-02-28)
+  - `anthropic.claude-v2` (~100k context, claude 2.0)
+  - `anthropic.claude-v2:1` (~200k context, claude 2.1)
+- **Claude Instant**
+  - `anthropic.claude-instant-v1` (~100k context, claude instant 1.2)
+## Note regarding logging
+By default, the proxy will refuse to use keys if it finds that logging is enabled, or if it doesn't have permission to check logging status.
+If you can't attach the `bedrock:GetModelInvocationLoggingConfiguration` policy to the principal, you can set the `ALLOW_AWS_LOGGING` environment variable to `true` to force the proxy to use the keys anyway. A warning will appear on the info page when this is enabled.

docs/azure-configuration.md ADDED Viewed

	@@ -0,0 +1,30 @@

+# Configuring the proxy for Azure
+The proxy supports Azure OpenAI Service via the `/proxy/azure/openai` endpoint. The process of setting it up is slightly different from regular OpenAI.
+- [Setting keys](#setting-keys)
+- [Model assignment](#model-assignment)
+## Setting keys
+Use the `AZURE_CREDENTIALS` environment variable to set the Azure API keys.
+Like other APIs, you can provide multiple keys separated by commas. Each Azure key, however, is a set of values including the Resource Name, Deployment ID, and API key. These are separated by a colon (`:`).
+For example:
+```
+AZURE_CREDENTIALS=contoso-ml:gpt4-8k:0123456789abcdef0123456789abcdef,northwind-corp:testdeployment:0123456789abcdef0123456789abcdef
+```
+## Model assignment
+Note that each Azure deployment is assigned a model when you create it in the Azure OpenAI Service portal. If you want to use a different model, you'll need to create a new deployment, and therefore a new key to be added to the AZURE_CREDENTIALS environment variable. Each credential only grants access to one model.
+### Supported model IDs
+Users can send normal OpenAI model IDs to the proxy to invoke the corresponding models. For the most part they work the same with Azure. GPT-3.5 Turbo has an ID of "gpt-35-turbo" because Azure doesn't allow periods in model names, but the proxy should automatically convert this to the correct ID.
+As noted above, you can only use model IDs for which a deployment has been created and added to the proxy.
+## On content filtering
+Be aware that all Azure OpenAI Service deployments have content filtering enabled by default at a Medium level. Prompts or responses which are deemed to be inappropriate will be rejected by the API. This is a feature of the Azure OpenAI Service and not the proxy.
+You can disable this from deployment's settings within Azure, but you would need to request an exemption from Microsoft for your organization first. See [this page](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/content-filters) for more information.

docs/dall-e-configuration.md ADDED Viewed

	@@ -0,0 +1,71 @@

+# Configuring the proxy for DALL-E
+The proxy supports DALL-E 2 and DALL-E 3 image generation via the `/proxy/openai-images` endpoint. By default it is disabled as it is somewhat expensive and potentially more open to abuse than text generation.
+- [Updating your Dockerfile](#updating-your-dockerfile)
+- [Enabling DALL-E](#enabling-dall-e)
+- [Setting quotas](#setting-quotas)
+- [Rate limiting](#rate-limiting)
+## Updating your Dockerfile
+If you are using a previous version of the Dockerfile supplied with the proxy, it doesn't have the necessary permissions to let the proxy save temporary files.
+You can replace the entire thing with the new Dockerfile at [./docker/huggingface/Dockerfile](../docker/huggingface/Dockerfile) (or the equivalent for Render deployments).
+You can also modify your existing Dockerfile; just add the following lines after the `WORKDIR` line:
+```Dockerfile
+# Existing
+RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
+WORKDIR /app
+# Take ownership of the app directory and switch to the non-root user
+RUN chown -R 1000:1000 /app
+USER 1000
+# Existing
+RUN npm install
+```
+## Enabling DALL-E
+Add `dall-e` to the `ALLOWED_MODEL_FAMILIES` environment variable to enable DALL-E. For example:
+```
+# GPT3.5 Turbo, GPT-4, GPT-4 Turbo, and DALL-E
+ALLOWED_MODEL_FAMILIES=turbo,gpt-4,gpt-4turbo,dall-e
+# All models as of this writing
+ALLOWED_MODEL_FAMILIES=turbo,gpt4,gpt4-32k,gpt4-turbo,claude,gemini-pro,aws-claude,dall-e
+```
+Refer to [.env.example](../.env.example) for a full list of supported model families. You can add `dall-e` to that list to enable all models.
+## Setting quotas
+DALL-E doesn't bill by token like text generation models. Instead there is a fixed cost per image generated, depending on the model, image size, and selected quality.
+The proxy still uses tokens to set quotas for users. The cost for each generated image will be converted to "tokens" at a rate of 100000 tokens per US$1.00. This works out to a similar cost-per-token as GPT-4 Turbo, so you can use similar token quotas for both.
+Use `TOKEN_QUOTA_DALL_E` to set the default quota for image generation. Otherwise it works the same as token quotas for other models.
+```
+# ~50 standard DALL-E images per refresh period, or US$2.00
+TOKEN_QUOTA_DALL_E=200000
+```
+Refer to [https://openai.com/pricing](https://openai.com/pricing) for the latest pricing information. As of this writing, the cheapest DALL-E 3 image costs $0.04 per generation, which works out to 4000 tokens. Higher resolution and quality settings can cost up to $0.12 per image, or 12000 tokens.
+## Rate limiting
+The old `MODEL_RATE_LIMIT` setting has been split into `TEXT_MODEL_RATE_LIMIT` and `IMAGE_MODEL_RATE_LIMIT`. Whatever value you previously set for `MODEL_RATE_LIMIT` will be used for text models.
+If you don't specify a `IMAGE_MODEL_RATE_LIMIT`, it defaults to half of the `TEXT_MODEL_RATE_LIMIT`, to a minimum of 1 image per minute.
+```
+# 4 text generations per minute, 2 images per minute
+TEXT_MODEL_RATE_LIMIT=4
+IMAGE_MODEL_RATE_LIMIT=2
+```
+If a prompt is filtered by OpenAI's content filter, it won't count towards the rate limit.
+## Hiding recent images
+By default, the proxy shows the last 12 recently generated images by users. You can hide this section by setting `SHOW_RECENT_IMAGES` to `false`.

docs/deploy-huggingface.md ADDED Viewed

	@@ -0,0 +1,104 @@

+# Deploy to Huggingface Space
+**⚠️ This method is no longer recommended.  Please use the [self-hosting instructions](./self-hosting.md) instead.**
+This repository can be deployed to a [Huggingface Space](https://huggingface.co/spaces).  This is a free service that allows you to run a simple server in the cloud.  You can use it to safely share your OpenAI API key with a friend.
+### 1. Get an API key
+- Go to [OpenAI](https://openai.com/) and sign up for an account. You can use a free trial key for this as long as you provide SMS verification.
+    - Claude is not publicly available yet, but if you have access to it via the [Anthropic](https://www.anthropic.com/) closed beta, you can also use that key with the proxy.
+### 2. Create an empty Huggingface Space
+- Go to [Huggingface](https://huggingface.co/) and sign up for an account.
+- Once logged in, [create a new Space](https://huggingface.co/new-space).
+- Provide a name for your Space and select "Docker" as the SDK.  Select "Blank" for the template.
+- Click "Create Space" and wait for the Space to be created.
+![Create Space](assets/huggingface-createspace.png)
+### 3. Create an empty Dockerfile
+- Once your Space is created, you'll see an option to "Create the Dockerfile in your browser".  Click that link.
+![Create Dockerfile](assets/huggingface-dockerfile.png)
+- Paste the following into the text editor and click "Save".
+```dockerfile
+FROM node:18-bullseye-slim
+RUN apt-get update && \
+    apt-get install -y git
+RUN git clone https://gitgud.io/khanon/oai-reverse-proxy.git /app
+WORKDIR /app
+RUN chown -R 1000:1000 /app
+USER 1000
+RUN npm install
+COPY Dockerfile greeting.md* .env* ./
+RUN npm run build
+EXPOSE 7860
+ENV NODE_ENV=production
+ENV NODE_OPTIONS="--max-old-space-size=12882"
+CMD [ "npm", "start" ]
+```
+- Click "Commit new file to `main`" to save the Dockerfile.
+![Commit](assets/huggingface-savedockerfile.png)
+### 4. Set your API key as a secret
+- Click the Settings button in the top right corner of your repository.
+- Scroll down to the `Repository Secrets` section and click `New Secret`.
+![Secrets](https://files.catbox.moe/irrp2p.png)
+- Enter `OPENAI_KEY` as the name and your OpenAI API key as the value.
+    - For Claude, set `ANTHROPIC_KEY` instead.
+    - You can use both types of keys at the same time if you want.
+![New Secret](https://files.catbox.moe/ka6s1a.png)
+### 5. Deploy the server
+- Your server should automatically deploy when you add the secret, but if not you can select `Factory Reboot` from that same Settings menu.
+### 6. Share the link
+- The Service Info section below should show the URL for your server. You can share this with anyone to safely give them access to your API key.
+- Your friend doesn't need any API key of their own, they just need your link.
+# Optional
+## Updating the server
+To update your server, go to the Settings menu and select `Factory Reboot`.  This will pull the latest version of the code from GitHub and restart the server.
+Note that if you just perform a regular Restart, the server will be restarted with the same code that was running before.
+## Adding a greeting message
+You can create a Markdown file called `greeting.md` to display a message on the Server Info page.  This is a good place to put instructions for how to use the server.
+## Customizing the server
+The server will be started with some default configuration, but you can override it by adding a `.env` file to your Space.  You can use Huggingface's web editor to create a new `.env` file alongside your Dockerfile. Huggingface will restart your server automatically when you save the file.
+Here are some example settings:
+```shell
+# Requests per minute per IP address
+MODEL_RATE_LIMIT=4
+# Max tokens to request from OpenAI
+MAX_OUTPUT_TOKENS_OPENAI=256
+# Max tokens to request from Anthropic (Claude)
+MAX_OUTPUT_TOKENS_ANTHROPIC=512
+# Block prompts containing disallowed characters
+REJECT_DISALLOWED=false
+REJECT_MESSAGE="This content violates /aicg/'s acceptable use policy."
+```
+See `.env.example` for a full list of available settings, or check `config.ts` for details on what each setting does.
+## Restricting access to the server
+If you want to restrict access to the server, you can set a `PROXY_KEY` secret.  This key will need to be passed in the Authentication header of every request to the server, just like an OpenAI API key.  Set the `GATEKEEPER` mode to `proxy_key`, and then set the `PROXY_KEY` variable to whatever password you want.
+Add this using the same method as the OPENAI_KEY secret above. Don't add this to your `.env` file because that file is public and anyone can see it.
+Example:
+```
+GATEKEEPER=proxy_key
+PROXY_KEY=your_secret_password
+```

docs/deploy-render.md ADDED Viewed

	@@ -0,0 +1,56 @@

+# Deploy to Render.com
+**⚠️ This method is no longer supported or recommended and may not work.  Please use the [self-hosting instructions](./self-hosting.md) instead.**
+Render.com offers a free tier that includes 750 hours of compute time per month.  This is enough to run a single proxy instance 24/7.  Instances shut down after 15 minutes without traffic but start up again automatically when a request is received.  You can use something like https://app.checklyhq.com/ to ping your proxy every 15 minutes to keep it alive.
+### 1. Create account
+- [Sign up for Render.com](https://render.com/) to create an account and access the dashboard.
+### 2. Create a service using a Blueprint
+Render allows you to deploy and auutomatically configure a repository containing a [render.yaml](../render.yaml) file using its Blueprints feature.  This is the easiest way to get started.
+- Click the **Blueprints** tab at the top of the dashboard.
+- Click **New Blueprint Instance**.
+- Under **Public Git repository**, enter `https://gitlab.com/khanon/oai-proxy`.
+  - Note that this is not the GitGud repository, but a mirror on GitLab.
+- Click **Continue**.
+- Under **Blueprint Name**, enter a name.
+- Under **Branch**, enter `main`.
+- Click **Apply**.
+The service will be created according to the instructions in the `render.yaml` file.  Don't wait for it to complete as it will fail due to missing environment variables.  Instead, proceed to the next step.
+### 3. Set environment variables
+- Return to the **Dashboard** tab.
+- Click the name of the service you just created, which may show as "Deploy failed".
+- Click the **Environment** tab.
+- Click **Add Secret File**.
+- Under **Filename**, enter `.env`.
+- Under **Contents**, enter all of your environment variables, one per line, in the format `NAME=value`.
+  - For example, `OPENAI_KEY=sk-abc123`.
+- Click **Save Changes**.
+**IMPORTANT:** Set `TRUSTED_PROXIES=3`, otherwise users' IP addresses will not be recorded correctly (the server will see the IP address of Render's load balancer instead of the user's real IP address).
+The service will automatically rebuild and deploy with the new environment variables.  This will take a few minutes.  The link to your deployed proxy will appear at the top of the page.
+If you want to change the URL, go to the **Settings** tab of your Web Service and click the **Edit** button next to **Name**.  You can also set a custom domain, though I haven't tried this yet.
+# Optional
+## Updating the server
+To update your server, go to the page for your Web Service and click **Manual Deploy** > **Deploy latest commit**.  This will pull the latest version of the code and redeploy the server.
+_If you have trouble with this, you can also try selecting **Clear build cache & deploy** instead from the same menu._
+## Adding a greeting message
+To show a greeting message on the Server Info page, set the `GREETING_URL` environment variable within Render to the URL of a Markdown file.  This URL should point to a raw text file, not an HTML page. You can use a public GitHub Gist or GitLab Snippet for this.  For example: `GREETING_URL=https://gitlab.com/-/snippets/2542011/raw/main/greeting.md`.  You can change the title of the page by setting the `SERVER_TITLE` environment variable.
+Don't set `GREETING_URL` in the `.env` secret file you created earlier; it must be set in Render's environment variables section for it to work correctly.
+## Customizing the server
+You can customize the server by editing the `.env` configuration you created earlier. Refer to [.env.example](../.env.example) for a list of all available configuration options. Further information can be found in the [config.ts](../src/config.ts) file.

docs/gcp-configuration.md ADDED Viewed

	@@ -0,0 +1,35 @@

+# Configuring the proxy for Vertex AI (GCP)
+The proxy supports GCP models via the `/proxy/gcp/claude` endpoint. There are a few extra steps necessary to use GCP compared to the other supported APIs.
+- [Setting keys](#setting-keys)
+- [Setup Vertex AI](#setup-vertex-ai)
+- [Supported model IDs](#supported-model-ids)
+## Setting keys
+Use the `GCP_CREDENTIALS` environment variable to set the GCP API keys.
+Like other APIs, you can provide multiple keys separated by commas. Each GCP key, however, is a set of credentials including the project id, client email, region and private key. These are separated by a colon (`:`).
+For example:
+```
+GCP_CREDENTIALS=my-first-project:xxx@yyy.com:us-east5:-----BEGIN PRIVATE KEY-----xxx-----END PRIVATE KEY-----,my-first-project2:xxx2@yyy.com:us-east5:-----BEGIN PRIVATE KEY-----xxx-----END PRIVATE KEY-----
+```
+## Setup Vertex AI
+1. Go to [https://cloud.google.com/vertex-ai](https://cloud.google.com/vertex-ai) and sign up for a GCP account. ($150 free credits without credit card or $300 free credits with credit card, credits expire in 90 days)
+2. Go to [https://console.cloud.google.com/marketplace/product/google/aiplatform.googleapis.com](https://console.cloud.google.com/marketplace/product/google/aiplatform.googleapis.com) to enable Vertex AI API.
+3. Go to [https://console.cloud.google.com/vertex-ai](https://console.cloud.google.com/vertex-ai) and navigate to Model Garden to apply for access to the Claude models.
+4. Create a [Service Account](https://console.cloud.google.com/projectselector/iam-admin/serviceaccounts/create?walkthrough_id=iam--create-service-account#step_index=1) , and make sure to grant the role of "Vertex AI User" or "Vertex AI Administrator".
+5. On the service account page you just created, create a new key and select "JSON". The JSON file will be downloaded automatically.
+6. The required credential is in the JSON file you just downloaded.
+## Supported model IDs
+Users can send these model IDs to the proxy to invoke the corresponding models.
+- **Claude**
+  - `claude-3-haiku@20240307`
+  - `claude-3-sonnet@20240229`
+  - `claude-3-opus@20240229`
+  - `claude-3-5-sonnet@20240620`

docs/logging-sheets.md ADDED Viewed

	@@ -0,0 +1,61 @@

+# Warning
+**I strongly suggest against using this feature with a Google account that you care about.** Depending on the content of the prompts people submit, Google may flag the spreadsheet as containing inappropriate content. This seems to prevent you from sharing that spreadsheet _or any others on the account. This happened with my throwaway account during testing; the existing shared spreadsheet continues to work but even completely new spreadsheets are flagged and cannot be shared.
+I'll be looking into alternative storage backends but you should not use this implementation with a Google account you care about, or even one remotely connected to your main accounts (as Google has a history of linking accounts together via IPs/browser fingerprinting). Use a VPN and completely isolated VM to be safe.
+# Configuring Google Sheets Prompt Logging
+This proxy can log incoming prompts and model responses to Google Sheets. Some configuration on the Google side is required to enable this feature. The APIs used are free, but you will need a Google account and a Google Cloud Platform project.
+NOTE: Concurrency is not supported. Don't connect two instances of the server to the same spreadsheet or bad things will happen.
+## Prerequisites
+- A Google account
+  - **USE A THROWAWAY ACCOUNT!**
+- A Google Cloud Platform project
+### 0. Create a Google Cloud Platform Project
+_A Google Cloud Platform project is required to enable programmatic access to Google Sheets. If you already have a project, skip to the next step. You can also see the [Google Cloud Platform documentation](https://developers.google.com/workspace/guides/create-project) for more information._
+- Go to the Google Cloud Platform Console and [create a new project](https://console.cloud.google.com/projectcreate).
+### 1. Enable the Google Sheets API
+_The Google Sheets API must be enabled for your project. You can also see the [Google Sheets API documentation](https://developers.google.com/sheets/api/quickstart/nodejs) for more information._
+- Go to the [Google Sheets API page](https://console.cloud.google.com/apis/library/sheets.googleapis.com) and click **Enable**, then fill in the form to enable the Google Sheets API for your project.
+<!-- TODO: Add screenshot of Enable page and describe filling out the form -->
+### 2. Create a Service Account
+_A service account is required to authenticate the proxy to Google Sheets._
+- Once the Google Sheets API is enabled, click the **Credentials** tab on the Google Sheets API page.
+- Click **Create credentials** and select **Service account**.
+- Provide a name for the service account and click **Done** (the second and third steps can be skipped).
+### 3. Download the Service Account Key
+_Once your account is created, you'll need to download the key file and include it in the proxy's secrets configuration._
+- Click the Service Account you just created in the list of service accounts for the API.
+- Click the **Keys** tab and click **Add key**, then select **Create new key**.
+- Select **JSON** as the key type and click **Create**.
+The JSON file will be downloaded to your computer.
+### 4. Set the Service Account key as a Secret
+_The JSON key file must be set as a secret in the proxy's configuration. Because files cannot be included in the secrets configuration, you'll need to base64 encode the file's contents and paste the encoded string as the value of the `GOOGLE_SHEETS_KEY` secret._
+- Open the JSON key file in a text editor and copy the contents.
+- Visit the [base64 encode/decode tool](https://www.base64encode.org/) and paste the contents into the box, then click **Encode**.
+- Copy the encoded string and paste it as the value of the `GOOGLE_SHEETS_KEY` secret in the deployment's secrets configuration.
+  - **WARNING:** Don't reveal this string publically. The `.env` file is NOT private -- unless you're running the proxy locally, you should not use it to store secrets!
+### 5. Create a new spreadsheet and share it with the service account
+_The service account must be given permission to access the logging spreadsheet. Each service account has a unique email address, which can be found in the JSON key file; share the spreadsheet with that email address just as you would share it with another user._
+- Open the JSON key file in a text editor and copy the value of the `client_email` field.
+- Open the spreadsheet you want to log to, or create a new one, and click **File > Share**.
+- Paste the service account's email address into the **Add people or groups** field. Ensure the service account has **Editor** permissions, then click **Done**.
+### 6. Set the spreadsheet ID as a Secret
+_The spreadsheet ID must be set as a secret in the proxy's configuration. The spreadsheet ID can be found in the URL of the spreadsheet. For example, the spreadsheet ID for `https://docs.google.com/spreadsheets/d/1X2Y3Z/edit#gid=0` is `1X2Y3Z`.  The ID isn't necessarily a sensitive value if you intend for the spreadsheet to be public, but it's still recommended to set it as a secret._
+- Copy the spreadsheet ID and paste it as the value of the `GOOGLE_SHEETS_SPREADSHEET_ID` secret in the deployment's secrets configuration.

docs/pow-captcha.md ADDED Viewed

	@@ -0,0 +1,135 @@

+# Proof-of-work Verification
+You can require users to complete a proof-of-work before they can access the
+proxy. This can increase the cost of denial of service attacks and slow down
+automated abuse.
+When configured, users access the challenge UI and request a token. The server
+sends a challenge to the client, which asks the user's browser to find a
+solution to the challenge that meets a certain constraint (the difficulty
+level). Once the user has found a solution, they can submit it to the server
+and get a user token valid for a period you specify.
+The proof-of-work challenge uses the argon2id hash function.
+## Configuration
+To enable proof-of-work verification, set the following environment variables:
+```
+GATEKEEPER=user_token
+CAPTCHA_MODE=proof_of_work
+# Validity of the token in hours
+POW_TOKEN_HOURS=24
+# Max number of IPs that can use a user_token issued via proof-of-work
+POW_TOKEN_MAX_IPS=2
+# The difficulty level of the proof-of-work challenge. You can use one of the
+# predefined levels specified below, or you can specify a custom number of
+# expected hash iterations.
+POW_DIFFICULTY_LEVEL=low
+# The time limit for solving the challenge, in minutes
+POW_CHALLENGE_TIMEOUT=30
+```
+## Difficulty Levels
+The difficulty level controls how long, on average, it will take for a user to
+solve the proof-of-work challenge. Due to randomness, the actual time can very
+significantly; lucky users may solve the challenge in a fraction of the average
+time, while unlucky users may take much longer.
+The difficulty level doesn't affect the speed of the hash function itself, only
+the number of hashes that will need to be computed. Therefore, the time required
+to complete the challenge scales linearly with the difficulty level's iteration
+count.
+You can adjust the difficulty level while the proxy is running from the admin
+interface.
+Be aware that there is a time limit for solving the challenge, by default set to
+30 minutes. Above 'high' difficulty, you will probably need to increase the time
+limit or it will be very hard for users with slow devices to find a solution
+within the time limit.
+### Low
+- Average of 200 iterations required
+- Default setting.
+### Medium
+- Average of 900 iterations required
+### High
+- Average of 1900 iterations required
+### Extreme
+- Average of 4000 iterations required
+- Not recommended unless you are expecting very high levels of abuse
+- May require increasing `POW_CHALLENGE_TIMEOUT`
+### Custom
+Setting `POW_DIFFICULTY_LEVEL` to an integer will use that number of iterations
+as the difficulty level.
+## Other challenge settings
+- `POW_CHALLENGE_TIMEOUT`: The time limit for solving the challenge, in minutes.
+  Default is 30.
+- `POW_TOKEN_HOURS`: The period of time for which a user token issued via proof-
+  of-work can be used. Default is 24 hours. Starts when the challenge is solved.
+- `POW_TOKEN_MAX_IPS`: The maximum number of unique IPs that can use a single
+  user token issued via proof-of-work. Default is 2.
+- `POW_TOKEN_PURGE_HOURS`: The period of time after which an expired user token
+  issued via proof-of-work will be removed from the database. Until it is
+  purged, users can refresh expired tokens by completing a half-difficulty
+  challenge. Default is 48 hours.
+- `POW_MAX_TOKENS_PER_IP`: The maximum number of active user tokens that can
+  be associated with a single IP address. After this limit is reached, the
+  oldest token will be forcibly expired when a new token is issued. Set to 0
+  to disable this feature. Default is 0.
+## Custom argon2id parameters
+You can set custom argon2id parameters for the proof-of-work challenge.
+Generally, you should not need to change these unless you have a specific
+reason to do so.
+The listed values are the defaults.
+```
+ARGON2_TIME_COST=8
+ARGON2_MEMORY_KB=65536
+ARGON2_PARALLELISM=1
+ARGON2_HASH_LENGTH=32
+```
+Increasing parallelism will not do much except increase memory consumption for
+both the client and server, because browser proof-of-work implementations are
+single-threaded. It's better to increase the time cost if you want to increase
+the difficulty.
+Increasing memory too much may cause memory exhaustion on some mobile devices,
+particularly on iOS due to the way Safari handles WebAssembly memory allocation.
+## Tested hash rates
+These were measured with the default argon2id parameters listed above. These
+tests were not at all scientific so take them with a grain of salt.
+Safari does not like large WASM memory usage, so concurrency is limited to 4 to
+avoid overallocating memory on mobile WebKit browsers. Thermal throttling can
+also significantly reduce hash rates on mobile devices.
+- Intel Core i9-13900K (Chrome): 33-35 H/s
+- Intel Core i9-13900K (Firefox): 29-32 H/s
+- Intel Core i9-13900K (Chrome, in VM limited to 4 cores): 12.2 - 13.0 H/s
+- iPad Pro (M2) (Safari, 6 workers): 8.0 - 10 H/s
+  - Thermal throttles early. 8 cores is normal concurrency, but unstable.
+- iPhone 15 Pro Max (Safari): 4.0 - 4.6 H/s
+- Samsung Galaxy S10e (Chrome): 3.6 - 3.8 H/s
+  - This is a 2019 phone almost matching an iPhone five years newer because of
+    bad Safari performance.

docs/self-hosting.md ADDED Viewed

	@@ -0,0 +1,150 @@

+# Quick self-hosting guide
+Temporary guide for self-hosting. This will be improved in the future to provide more robust instructions and options. Provided commands are for Ubuntu.
+This uses prebuilt Docker images for convenience. If you want to make adjustments to the code you can instead clone the repo and follow the Local Development guide in the [README](../README.md).
+## Table of Contents
+- [Requirements](#requirements)
+- [Running the application](#running-the-application)
+- [Setting up a reverse proxy](#setting-up-a-reverse-proxy)
+  - [trycloudflare](#trycloudflare)
+  - [nginx](#nginx)
+    - [Example basic nginx configuration (no SSL)](#example-basic-nginx-configuration-no-ssl)
+    - [Example with Cloudflare SSL](#example-with-cloudflare-ssl)
+- [Updating/Restarting the application](#updatingrestarting-the-application)
+## Requirements
+- Docker
+- Docker Compose
+- A VPS with at least 512MB of RAM (1GB recommended)
+- A domain name
+If you don't have a VPS and domain name you can use TryCloudflare to set up a temporary URL that you can share with others. See [trycloudflare](#trycloudflare) for more information.
+## Running the application
+- Install Docker and Docker Compose
+- Create a new directory for the application
+  - This will contain your .env file, greeting file, and any user-generated files
+- Execute the following commands:
+  - ```
+    touch .env
+    touch greeting.md
+    echo "OPENAI_KEY=your-openai-key" >> .env
+    curl https://gitgud.io/khanon/oai-reverse-proxy/-/raw/main/docker/docker-compose-selfhost.yml -o docker-compose.yml
+    ```
+  - You can set further environment variables and keys in the `.env` file. See [.env.example](../.env.example) for a list of available options.
+  - You can set a custom greeting in `greeting.md`. This will be displayed on the homepage.
+- Run `docker compose up -d`
+You can check logs with `docker compose logs -n 100 -f`.
+The provided docker-compose file listens on port 7860 but binds to localhost only. You should use a reverse proxy to expose the application to the internet as described in the next section.
+## Setting up a reverse proxy
+Rather than exposing the application directly to the internet, it is recommended to set up a reverse proxy. This will allow you to use HTTPS and add additional security measures.
+### trycloudflare
+This will give you a temporary (72 hours) URL that you can use to let others connect to your instance securely, without having to set up a reverse proxy. If you are running the server on your home network, this is probably the best option.
+- Install `cloudflared` following the instructions at [try.cloudflare.com](https://try.cloudflare.com/).
+- Run `cloudflared tunnel --url http://localhost:7860`
+- You will be given a temporary URL that you can share with others.
+If you have a VPS, you should use a proper reverse proxy like nginx instead for a more permanent solution which will allow you to use your own domain name, handle SSL, and add additional security/anti-abuse measures.
+### nginx
+First, install nginx.
+- `sudo apt update && sudo apt install nginx`
+#### Example basic nginx configuration (no SSL)
+- `sudo nano /etc/nginx/sites-available/oai.conf`
+  - ```
+    server {
+        listen 80;
+        server_name example.com;
+        location / {
+            proxy_pass http://localhost:7860;
+        }
+    }
+    ```
+  - Replace `example.com` with your domain name.
+  - Ctrl+X to exit, Y to save, Enter to confirm.
+- `sudo ln -s /etc/nginx/sites-available/oai.conf /etc/nginx/sites-enabled`
+- `sudo nginx -t`
+  - This will check the configuration file for errors.
+- `sudo systemctl restart nginx`
+  - This will restart nginx and apply the new configuration.
+#### Example with Cloudflare SSL
+This allows you to use a self-signed certificate on the server, and have Cloudflare handle client SSL. You need to have a Cloudflare account and have your domain set up with Cloudflare already, pointing to your server's IP address.
+- Set Cloudflare to use Full SSL mode. Since we are using a self-signed certificate, don't use Full (strict) mode.
+- Create a self-signed certificate:
+  - `openssl req -x509 -nodes -days 365 -newkey rsa:2048 -keyout /etc/ssl/private/nginx-selfsigned.key -out /etc/ssl/certs/nginx-selfsigned.crt`
+- `sudo nano /etc/nginx/sites-available/oai.conf`
+  - ```
+    server {
+        listen 443 ssl;
+        server_name yourdomain.com www.yourdomain.com;
+        ssl_certificate /etc/ssl/certs/nginx-selfsigned.crt;
+        ssl_certificate_key /etc/ssl/private/nginx-selfsigned.key;
+        # Only allow inbound traffic from Cloudflare
+        allow 173.245.48.0/20;
+        allow 103.21.244.0/22;
+        allow 103.22.200.0/22;
+        allow 103.31.4.0/22;
+        allow 141.101.64.0/18;
+        allow 108.162.192.0/18;
+        allow 190.93.240.0/20;
+        allow 188.114.96.0/20;
+        allow 197.234.240.0/22;
+        allow 198.41.128.0/17;
+        allow 162.158.0.0/15;
+        allow 104.16.0.0/13;
+        allow 104.24.0.0/14;
+        allow 172.64.0.0/13;
+        allow 131.0.72.0/22;
+        deny all;
+        location / {
+            proxy_pass http://localhost:7860;
+            proxy_http_version 1.1;
+            proxy_set_header Upgrade $http_upgrade;
+            proxy_set_header Connection 'upgrade';
+            proxy_set_header Host $host;
+            proxy_cache_bypass $http_upgrade;
+        }
+        ssl_protocols TLSv1.2 TLSv1.3;
+        ssl_ciphers 'ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256';
+        ssl_prefer_server_ciphers on;
+        ssl_session_cache shared:SSL:10m;
+    }
+    ```
+  - Replace `yourdomain.com` with your domain name.
+  - Ctrl+X to exit, Y to save, Enter to confirm.
+- `sudo ln -s /etc/nginx/sites-available/oai.conf /etc/nginx/sites-enabled`
+## Updating/Restarting the application
+After making an .env change, you need to restart the application for it to take effect.
+- `docker compose down`
+- `docker compose up -d`
+To update the application to the latest version:
+- `docker compose pull`
+- `docker compose down`
+- `docker compose up -d`
+- `docker image prune -f`

docs/user-management.md ADDED Viewed

	@@ -0,0 +1,85 @@

+# User Management
+The proxy supports several different user management strategies. You can choose the one that best fits your needs by setting the `GATEKEEPER` environment variable.
+Several of these features require you to set secrets in your environment. If using Huggingface Spaces to deploy, do not set these in your `.env` file because that file is public and anyone can see it.
+## Table of Contents
+- [No user management](#no-user-management-gatekeepernone)
+- [Single-password authentication](#single-password-authentication-gatekeeperproxy_key)
+- [Per-user authentication](#per-user-authentication-gatekeeperuser_token)
+  - [Memory](#memory)
+  - [Firebase Realtime Database](#firebase-realtime-database)
+    - [Firebase setup instructions](#firebase-setup-instructions)
+  - [SQLite Database](#sqlite-database)
+- [Whitelisting admin IP addresses](#whitelisting-admin-ip-addresses)
+## No user management (`GATEKEEPER=none`)
+This is the default mode. The proxy will not require any authentication to access the server and offers basic IP-based rate limiting and anti-abuse features.
+## Single-password authentication (`GATEKEEPER=proxy_key`)
+This mode allows you to set a password that must be passed in the `Authentication` header of every request to the server as a bearer token. This is useful if you want to restrict access to the server, but don't want to create a separate account for every user.
+To set the password, create a `PROXY_KEY` secret in your environment.
+## Per-user authentication (`GATEKEEPER=user_token`)
+This mode allows you to provision separate Bearer tokens for each user. You can manage users via the /admin/users via REST or through the admin interface at `/admin`.
+To begin, set `ADMIN_KEY` to a secret value. This will be used to authenticate requests to the REST API or to log in to the UI.
+[You can find an OpenAPI specification for the /admin/users REST API here.](openapi-admin-users.yaml)
+By default, the proxy will store user data in memory. Naturally, this means that user data will be lost when the proxy is restarted, though you can use the user import/export feature to save and restore user data manually or via a script. However, the proxy also supports persisting user data to an external data store with some additional configuration.
+Below are the supported data stores and their configuration options.
+### Memory
+This is the default data store (`GATEKEEPER_STORE=memory`) User data will be stored in memory and will be lost when the server is restarted. You are responsible for exporting and re-importing user data after a restart.
+### Firebase Realtime Database
+To use Firebase Realtime Database to persist user data, set the following environment variables:
+- `GATEKEEPER_STORE`: Set this to `firebase_rtdb`
+- **Secret** `FIREBASE_RTDB_URL`: The URL of your Firebase Realtime Database, e.g. `https://my-project-default-rtdb.firebaseio.com`
+- **Secret** `FIREBASE_KEY`: A base-64 encoded service account key for your Firebase project. Refer to the instructions below for how to create this key.
+**Firebase setup instructions**
+1. Go to the [Firebase console](https://console.firebase.google.com/) and click "Add project", then follow the prompts to create a new project.
+2. From the **Project Overview** page, click **All products** in the left sidebar, then click **Realtime Database**.
+3. Click **Create database** and choose **Start in test mode**. Click **Enable**.
+   - Test mode is fine for this use case as it still requires authentication to access the database. You may wish to set up more restrictive rules if you plan to use the database for other purposes.
+   - The reference URL for the database will be displayed on the page. You will need this later.
+4. Click the gear icon next to **Project Overview** in the left sidebar, then click **Project settings**.
+5. Click the **Service accounts** tab, then click **Generate new private key**.
+6. The downloaded file contains your key. Encode it as base64 and set it as the `FIREBASE_KEY` secret in your environment.
+7. Set `FIREBASE_RTDB_URL` to the reference URL of your Firebase Realtime Database, e.g. `https://my-project-default-rtdb.firebaseio.com`.
+8. Set `GATEKEEPER_STORE` to `firebase_rtdb` in your environment if you haven't already.
+The proxy server will attempt to connect to your Firebase Realtime Database at startup and will throw an error if it cannot connect. If you see this error, check that your `FIREBASE_RTDB_URL` and `FIREBASE_KEY` secrets are set correctly.
+### SQLite Database
+To use a local SQLite database file to persist user data, set the following environment variables:
+- `GATEKEEPER_STORE`: Set this to `sqlite`.
+- `SQLITE_USER_STORE_PATH` (Optional): Specifies the path to the SQLite database file.
+    - If not set, it defaults to `data/user-store.sqlite` within the project directory.
+    - Ensure that the directory where the SQLite file will be created (e.g., the `data/` directory) is writable by the application process.
+Using SQLite provides a simple way to persist user data locally without relying on external services. User data will be saved to the specified file and will be available across server restarts.
+## Whitelisting admin IP addresses
+You can add your own IP ranges to the `ADMIN_WHITELIST` environment variable for additional security.
+You can provide a comma-separated list containing individual IPv4 or IPv6 addresses, or CIDR ranges.
+To whitelist an entire IP range, use CIDR notation. For example, `192.168.0.1/24` would whitelist all addresses from `192.168.0.0` to `192.168.0.255`.
+To disable the whitelist, set `ADMIN_WHITELIST=0.0.0.0/0,::0`, which will allow access from any IPv4 or IPv6 address. This is the default behavior.

docs/user-quotas.md ADDED Viewed

	@@ -0,0 +1,36 @@

+# User Quotas
+When using `user_token` authentication, you can set (model) token quotas for user.  These quotas are enforced by the proxy server and are separate from the quotas enforced by OpenAI.
+You can set the default quota via environment variables. Quotas are enforced on a per-model basis, and count both prompt tokens and completion tokens. By default, all quotas are disabled.
+Set the following environment variables to set the default quotas:
+- `TOKEN_QUOTA_TURBO`
+- `TOKEN_QUOTA_GPT4`
+- `TOKEN_QUOTA_CLAUDE`
+Quotas only apply to `normal`-type users; `special`-type users are exempt from quotas. You can change users' types via the REST API.
+**Note that changes to these environment variables will only apply to newly created users.**  To modify existing users' quotas, use the REST API or the admin UI.
+## Automatically refreshing quotas
+You can use the `QUOTA_REFRESH_PERIOD` environment variable to automatically refresh users' quotas periodically.  This is useful if you want to give users a certain number of tokens per day, for example. The entire quota will be refreshed at the start of the specified period, and any tokens a user has not used will not be carried over.
+Quotas for all models and users will be refreshed. If you haven't set `TOKEN_QUOTA_*` for a particular model, quotas for that model will not be refreshed (so any manually set quotas will not be overwritten).
+Set the `QUOTA_REFRESH_PERIOD` environment variable to one of the following values:
+- `daily` (at midnight)
+- `hourly`
+- leave unset to disable automatic refreshing
+You can also use a cron expression, for example:
+- Every 45 seconds: `"*/45 * * * * *"`
+- Every 30 minutes: `"*/30 * * * *"`
+- Every 6 hours: `"0 */6 * * *"`
+- Every 3 days: `"0 0 */3 * *"`
+- Daily, but at mid-day: `"0 12 * * *"`
+Make sure to enclose the cron expression in quotation marks.
+All times are in the server's local time zone. Refer to [crontab.guru](https://crontab.guru/) for more examples.

http-client.env.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "dev": {
+    "proxy-host": "http://localhost:7860",
+    "oai-key-1": "override in http-client.private.env.json",
+    "proxy-key": "override in http-client.private.env.json",
+    "azu-resource-name": "override in http-client.private.env.json",
+    "azu-deployment-id": "override in http-client.private.env.json"
+  }
+}

package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

package.json ADDED Viewed

	@@ -0,0 +1,96 @@

+{
+  "name": "oai-reverse-proxy",
+  "version": "1.0.0",
+  "description": "Reverse proxy for the OpenAI API",
+  "scripts": {
+    "build": "tsc && copyfiles -u 1 src/**/*.ejs build",
+    "database:migrate": "ts-node scripts/migrate.ts",
+    "postinstall": "patch-package",
+    "prepare": "husky install",
+    "start": "node --trace-deprecation --trace-warnings build/server.js",
+    "start:dev": "nodemon --watch src --exec ts-node --transpile-only src/server.ts",
+    "start:debug": "ts-node --inspect --transpile-only src/server.ts",
+    "start:watch": "nodemon --require source-map-support/register build/server.js",
+    "type-check": "tsc --noEmit"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "author": "",
+  "license": "MIT",
+  "dependencies": {
+    "@anthropic-ai/tokenizer": "^0.0.4",
+    "@aws-crypto/sha256-js": "^5.2.0",
+    "@huggingface/jinja": "^0.3.0",
+    "@node-rs/argon2": "^1.8.3",
+    "@smithy/eventstream-codec": "^2.1.3",
+    "@smithy/eventstream-serde-node": "^2.1.3",
+    "@smithy/protocol-http": "^3.2.1",
+    "@smithy/signature-v4": "^2.1.3",
+    "@smithy/util-utf8": "^2.1.1",
+    "axios": "^1.7.4",
+    "better-sqlite3": "^10.0.0",
+    "check-disk-space": "^3.4.0",
+    "cookie-parser": "^1.4.6",
+    "copyfiles": "^2.4.1",
+    "cors": "^2.8.5",
+    "csrf-csrf": "^2.3.0",
+    "dotenv": "^16.3.1",
+    "ejs": "^3.1.10",
+    "express": "^4.19.3",
+    "express-session": "^1.17.3",
+    "firebase-admin": "^12.5.0",
+    "glob": "^10.3.12",
+    "googleapis": "^122.0.0",
+    "http-proxy": "1.18.1",
+    "http-proxy-middleware": "^3.0.2",
+    "ipaddr.js": "^2.1.0",
+    "memorystore": "^1.6.7",
+    "multer": "^1.4.5-lts.1",
+    "node-schedule": "^2.1.1",
+    "patch-package": "^8.0.0",
+    "pino": "^8.11.0",
+    "pino-http": "^8.3.3",
+    "proxy-agent": "^6.4.0",
+    "sanitize-html": "^2.13.0",
+    "sharp": "^0.32.6",
+    "showdown": "^2.1.0",
+    "source-map-support": "^0.5.21",
+    "stream-json": "^1.8.0",
+    "tiktoken": "^1.0.10",
+    "tinyws": "^0.1.0",
+    "uuid": "^9.0.0",
+    "zlib": "^1.0.5",
+    "zod": "^3.22.3",
+    "zod-error": "^1.5.0"
+  },
+  "devDependencies": {
+    "@smithy/types": "^3.3.0",
+    "@types/better-sqlite3": "^7.6.10",
+    "@types/cookie-parser": "^1.4.3",
+    "@types/cors": "^2.8.13",
+    "@types/express": "^4.17.17",
+    "@types/express-session": "^1.17.7",
+    "@types/multer": "^1.4.7",
+    "@types/node-schedule": "^2.1.0",
+    "@types/sanitize-html": "^2.9.0",
+    "@types/showdown": "^2.0.0",
+    "@types/stream-json": "^1.7.7",
+    "@types/uuid": "^9.0.1",
+    "concurrently": "^8.0.1",
+    "esbuild": "^0.25.5",
+    "esbuild-register": "^3.4.2",
+    "husky": "^8.0.3",
+    "nodemon": "^3.0.1",
+    "pino-pretty": "^10.2.3",
+    "prettier": "^3.0.3",
+    "prettier-plugin-ejs": "^1.0.3",
+    "ts-node": "^10.9.1",
+    "typescript": "^5.4.2"
+  },
+  "overrides": {
+    "node-fetch@2.x": {
+      "whatwg-url": "14.x"
+    }
+  }
+}

patches/README.md ADDED Viewed

	@@ -0,0 +1,23 @@

+# Patches
+Contains monkey patches for certain packages, applied using `patch-package`.
+## `http-proxy+1.18.1.patch`
+Modifies the `http-proxy` package to work around an incompatibility with
+body-parser and SOCKS5 proxies due to some esoteric stream handling behavior
+when `socks-proxy-agent` is used instead of a generic http.Agent.
+Modification involves adjusting the `buffer` property on ProxyServer's `options`
+object to be a function that returns a stream instead of a stream itself. This
+allows us to give it a function which produces a new Readable from the already-
+parsed request body.
+With the old implementation we would need to create an entirely new ProxyServer
+instance for each request, which is not ideal under heavy load.
+`http-proxy` hasn't been updated in six years so it's unlikely that this patch
+will be broken by future updates, but it's stil pinned to 1.18.1 for now.
+### See also
+https://github.com/chimurai/http-proxy-middleware/issues/40
+https://github.com/chimurai/http-proxy-middleware/issues/299
+https://github.com/http-party/node-http-proxy/pull/1027

patches/http-proxy+1.18.1.patch ADDED Viewed

	@@ -0,0 +1,13 @@

+diff --git a/node_modules/http-proxy/lib/http-proxy/passes/web-incoming.js b/node_modules/http-proxy/lib/http-proxy/passes/web-incoming.js
+index 7ae7355..c825c27 100644
+--- a/node_modules/http-proxy/lib/http-proxy/passes/web-incoming.js
++++ b/node_modules/http-proxy/lib/http-proxy/passes/web-incoming.js
+@@ -167,7 +167,7 @@ module.exports = {
+       }
+     }
+-    (options.buffer || req).pipe(proxyReq);
++    (options.buffer(req) || req).pipe(proxyReq);
+     proxyReq.on('response', function(proxyRes) {
+       if(server) { server.emit('proxyRes', proxyRes, req, res); }

public/css/reset.css ADDED Viewed

	@@ -0,0 +1,349 @@

+/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */
+/* Document
+   ========================================================================== */
+/**
+ * 1. Correct the line height in all browsers.
+ * 2. Prevent adjustments of font size after orientation changes in iOS.
+ */
+html {
+    line-height: 1.15; /* 1 */
+    -webkit-text-size-adjust: 100%; /* 2 */
+}
+/* Sections
+   ========================================================================== */
+/**
+ * Remove the margin in all browsers.
+ */
+body {
+    margin: 0;
+}
+/**
+ * Render the `main` element consistently in IE.
+ */
+main {
+    display: block;
+}
+/**
+ * Correct the font size and margin on `h1` elements within `section` and
+ * `article` contexts in Chrome, Firefox, and Safari.
+ */
+h1 {
+    font-size: 2em;
+    margin: 0.67em 0;
+}
+/* Grouping content
+   ========================================================================== */
+/**
+ * 1. Add the correct box sizing in Firefox.
+ * 2. Show the overflow in Edge and IE.
+ */
+hr {
+    box-sizing: content-box; /* 1 */
+    height: 0; /* 1 */
+    overflow: visible; /* 2 */
+}
+/**
+ * 1. Correct the inheritance and scaling of font size in all browsers.
+ * 2. Correct the odd `em` font sizing in all browsers.
+ */
+pre {
+    font-family: monospace, monospace; /* 1 */
+    font-size: 1em; /* 2 */
+}
+/* Text-level semantics
+   ========================================================================== */
+/**
+ * Remove the gray background on active links in IE 10.
+ */
+a {
+    background-color: transparent;
+}
+/**
+ * 1. Remove the bottom border in Chrome 57-
+ * 2. Add the correct text decoration in Chrome, Edge, IE, Opera, and Safari.
+ */
+abbr[title] {
+    border-bottom: none; /* 1 */
+    text-decoration: underline; /* 2 */
+    text-decoration: underline dotted; /* 2 */
+}
+/**
+ * Add the correct font weight in Chrome, Edge, and Safari.
+ */
+b,
+strong {
+    font-weight: bolder;
+}
+/**
+ * 1. Correct the inheritance and scaling of font size in all browsers.
+ * 2. Correct the odd `em` font sizing in all browsers.
+ */
+code,
+kbd,
+samp {
+    font-family: monospace, monospace; /* 1 */
+    font-size: 1em; /* 2 */
+}
+/**
+ * Add the correct font size in all browsers.
+ */
+small {
+    font-size: 80%;
+}
+/**
+ * Prevent `sub` and `sup` elements from affecting the line height in
+ * all browsers.
+ */
+sub,
+sup {
+    font-size: 75%;
+    line-height: 0;
+    position: relative;
+    vertical-align: baseline;
+}
+sub {
+    bottom: -0.25em;
+}
+sup {
+    top: -0.5em;
+}
+/* Embedded content
+   ========================================================================== */
+/**
+ * Remove the border on images inside links in IE 10.
+ */
+img {
+    border-style: none;
+}
+/* Forms
+   ========================================================================== */
+/**
+ * 1. Change the font styles in all browsers.
+ * 2. Remove the margin in Firefox and Safari.
+ */
+button,
+input,
+optgroup,
+select,
+textarea {
+    font-family: inherit; /* 1 */
+    font-size: 100%; /* 1 */
+    line-height: 1.15; /* 1 */
+    margin: 0; /* 2 */
+}
+/**
+ * Show the overflow in IE.
+ * 1. Show the overflow in Edge.
+ */
+button,
+input { /* 1 */
+    overflow: visible;
+}
+/**
+ * Remove the inheritance of text transform in Edge, Firefox, and IE.
+ * 1. Remove the inheritance of text transform in Firefox.
+ */
+button,
+select { /* 1 */
+    text-transform: none;
+}
+/**
+ * Correct the inability to style clickable types in iOS and Safari.
+ */
+button,
+[type="button"],
+[type="reset"],
+[type="submit"] {
+    -webkit-appearance: button;
+}
+/**
+ * Remove the inner border and padding in Firefox.
+ */
+button::-moz-focus-inner,
+[type="button"]::-moz-focus-inner,
+[type="reset"]::-moz-focus-inner,
+[type="submit"]::-moz-focus-inner {
+    border-style: none;
+    padding: 0;
+}
+/**
+ * Restore the focus styles unset by the previous rule.
+ */
+button:-moz-focusring,
+[type="button"]:-moz-focusring,
+[type="reset"]:-moz-focusring,
+[type="submit"]:-moz-focusring {
+    outline: 1px dotted ButtonText;
+}
+/**
+ * Correct the padding in Firefox.
+ */
+fieldset {
+    padding: 0.35em 0.75em 0.625em;
+}
+/**
+ * 1. Correct the text wrapping in Edge and IE.
+ * 2. Correct the color inheritance from `fieldset` elements in IE.
+ * 3. Remove the padding so developers are not caught out when they zero out
+ *    `fieldset` elements in all browsers.
+ */
+legend {
+    box-sizing: border-box; /* 1 */
+    color: inherit; /* 2 */
+    display: table; /* 1 */
+    max-width: 100%; /* 1 */
+    padding: 0; /* 3 */
+    white-space: normal; /* 1 */
+}
+/**
+ * Add the correct vertical alignment in Chrome, Firefox, and Opera.
+ */
+progress {
+    vertical-align: baseline;
+}
+/**
+ * Remove the default vertical scrollbar in IE 10+.
+ */
+textarea {
+    overflow: auto;
+}
+/**
+ * 1. Add the correct box sizing in IE 10.
+ * 2. Remove the padding in IE 10.
+ */
+[type="checkbox"],
+[type="radio"] {
+    box-sizing: border-box; /* 1 */
+    padding: 0; /* 2 */
+}
+/**
+ * Correct the cursor style of increment and decrement buttons in Chrome.
+ */
+[type="number"]::-webkit-inner-spin-button,
+[type="number"]::-webkit-outer-spin-button {
+    height: auto;
+}
+/**
+ * 1. Correct the odd appearance in Chrome and Safari.
+ * 2. Correct the outline style in Safari.
+ */
+[type="search"] {
+    -webkit-appearance: textfield; /* 1 */
+    outline-offset: -2px; /* 2 */
+}
+/**
+ * Remove the inner padding in Chrome and Safari on macOS.
+ */
+[type="search"]::-webkit-search-decoration {
+    -webkit-appearance: none;
+}
+/**
+ * 1. Correct the inability to style clickable types in iOS and Safari.
+ * 2. Change font properties to `inherit` in Safari.
+ */
+::-webkit-file-upload-button {
+    -webkit-appearance: button; /* 1 */
+    font: inherit; /* 2 */
+}
+/* Interactive
+   ========================================================================== */
+/*
+ * Add the correct display in Edge, IE 10+, and Firefox.
+ */
+details {
+    display: block;
+}
+/*
+ * Add the correct display in all browsers.
+ */
+summary {
+    display: list-item;
+}
+/* Misc
+   ========================================================================== */
+/**
+ * Add the correct display in IE 10+.
+ */
+template {
+    display: none;
+}
+/**
+ * Add the correct display in IE 10.
+ */
+[hidden] {
+    display: none;
+}

public/css/sakura-dark.css ADDED Viewed

	@@ -0,0 +1,231 @@

+/* modified https://github.com/oxalorg/sakura */
+html {
+  font-size: 62.5%;
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+    "Helvetica Neue", Arial, "Noto Sans", sans-serif;
+}
+body {
+  font-size: 1.8rem;
+  line-height: 1.618;
+  max-width: 38em;
+  margin: auto;
+  color: #c9c9c9;
+  background-color: #222222;
+  padding: 13px;
+}
+@media (max-width: 684px) {
+  body {
+    font-size: 1.53rem;
+  }
+}
+@media (max-width: 382px) {
+  body {
+    font-size: 1.35rem;
+  }
+}
+h1,
+h2,
+h3,
+h4,
+h5,
+h6 {
+  line-height: 1.1;
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+    "Helvetica Neue", Arial, "Noto Sans", sans-serif;
+  font-weight: 700;
+  margin-top: 3rem;
+  margin-bottom: 1.5rem;
+  overflow-wrap: break-word;
+  word-wrap: break-word;
+  -ms-word-break: break-all;
+  word-break: break-word;
+}
+h1 {
+  font-size: 2.35em;
+}
+h2 {
+  font-size: 2em;
+}
+h3 {
+  font-size: 1.75em;
+}
+h4 {
+  font-size: 1.5em;
+}
+h5 {
+  font-size: 1.25em;
+}
+h6 {
+  font-size: 1em;
+}
+p {
+  margin-top: 0px;
+  margin-bottom: 2.5rem;
+}
+small,
+sub,
+sup {
+  font-size: 75%;
+}
+hr {
+  border-color: #ffffff;
+}
+a {
+  text-decoration: none;
+  color: #ffffff;
+}
+a:visited {
+  color: #e6e6e6;
+}
+a:hover {
+  color: #c9c9c9;
+  text-decoration: underline;
+}
+ul {
+  padding-left: 1.4em;
+  margin-top: 0px;
+  margin-bottom: 2.5rem;
+}
+li {
+  margin-bottom: 0.4em;
+}
+blockquote {
+  margin-left: 0px;
+  margin-right: 0px;
+  padding-left: 1em;
+  padding-top: 0.8em;
+  padding-bottom: 0.8em;
+  padding-right: 0.8em;
+  border-left: 5px solid #ffffff;
+  margin-bottom: 2.5rem;
+  background-color: #4a4a4a;
+}
+blockquote p {
+  margin-bottom: 0;
+}
+img,
+video {
+  height: auto;
+  max-width: 100%;
+  margin-top: 0px;
+  margin-bottom: 2.5rem;
+}
+pre {
+  background-color: #4a4a4a;
+  display: block;
+  padding: 1em;
+  overflow-x: auto;
+  margin-top: 0px;
+  margin-bottom: 2.5rem;
+  font-size: 0.9em;
+}
+code,
+kbd,
+samp {
+  font-size: 0.9em;
+  padding: 0 0.5em;
+  background-color: #4a4a4a;
+  white-space: pre-wrap;
+}
+pre > code {
+  padding: 0;
+  background-color: transparent;
+  white-space: pre;
+  font-size: 1em;
+}
+table {
+  text-align: justify;
+  width: 100%;
+  border-collapse: collapse;
+  margin-bottom: 2rem;
+}
+td,
+th {
+  padding: 0.5em;
+  border-bottom: 1px solid #4a4a4a;
+}
+input,
+textarea {
+  border: 1px solid #c9c9c9;
+}
+input:focus,
+textarea:focus {
+  border: 1px solid #ffffff;
+}
+textarea {
+  width: 100%;
+}
+.button,
+button,
+input[type="submit"],
+input[type="reset"],
+input[type="button"],
+input[type="file"]::file-selector-button {
+  display: inline-block;
+  padding: 5px 10px;
+  text-align: center;
+  text-decoration: none;
+  white-space: nowrap;
+  background-color: #ffffff;
+  color: #222222;
+  border-radius: 1px;
+  border: 1px solid #ffffff;
+  cursor: pointer;
+  box-sizing: border-box;
+}
+.button[disabled],
+button[disabled],
+input[type="submit"][disabled],
+input[type="reset"][disabled],
+input[type="button"][disabled],
+input[type="file"][disabled] {
+  cursor: default;
+  opacity: 0.5;
+}
+.button:hover,
+button:hover,
+input[type="submit"]:hover,
+input[type="reset"]:hover,
+input[type="button"]:hover,
+input[type="file"]::file-selector-button:hover {
+  background-color: #c9c9c9;
+  color: #222222;
+  outline: 0;
+}
+.button:focus-visible,
+button:focus-visible,
+input[type="submit"]:focus-visible,
+input[type="reset"]:focus-visible,
+input[type="button"]:focus-visible,
+input[type="file"]::file-selector-button:focus-visible {
+  outline-style: solid;
+  outline-width: 2px;
+}
+textarea,
+select,
+input {
+  color: #c9c9c9;
+  padding: 6px 10px;
+  margin-bottom: 10px;
+  background-color: #4a4a4a;
+  border: 1px solid #4a4a4a;
+  border-radius: 4px;
+  box-shadow: none;
+  box-sizing: border-box;
+}
+textarea:focus,
+select:focus,
+input:focus {
+  border: 1px solid #ffffff;
+  outline: 0;
+}
+input[type="checkbox"]:focus {
+  outline: 1px dotted #ffffff;
+}
+label,
+legend,
+fieldset {
+  display: block;
+  margin-bottom: 0.5rem;
+  font-weight: 600;
+}

public/css/sakura.css ADDED Viewed

	@@ -0,0 +1,237 @@

+/* modified https://github.com/oxalorg/sakura */
+:root {
+  --accent-color: #4a4a4a;
+  --accent-color-hover: #5a5a5a;
+  --link-color: #58739c;
+  --link-visted-color: #6f5e6f;
+}
+html {
+  font-size: 62.5%;
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+    "Helvetica Neue", Arial, "Noto Sans", sans-serif;
+}
+body {
+  font-size: 1.8rem;
+  line-height: 1.618;
+  max-width: 38em;
+  margin: auto;
+  color: #4a4a4a;
+  background-color: #f9f9f9;
+  padding: 13px;
+}
+@media (max-width: 684px) {
+  body {
+    font-size: 1.53rem;
+  }
+}
+@media (max-width: 382px) {
+  body {
+    font-size: 1.35rem;
+  }
+}
+h1,
+h2,
+h3,
+h4,
+h5,
+h6 {
+  line-height: 1.1;
+  font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto,
+    "Helvetica Neue", Arial, "Noto Sans", sans-serif;
+  font-weight: 700;
+  margin-top: 3rem;
+  margin-bottom: 1.5rem;
+  overflow-wrap: break-word;
+  word-wrap: break-word;
+  -ms-word-break: break-all;
+  word-break: break-word;
+}
+h1 {
+  font-size: 2.35em;
+}
+h2 {
+  font-size: 2em;
+}
+h3 {
+  font-size: 1.75em;
+}
+h4 {
+  font-size: 1.5em;
+}
+h5 {
+  font-size: 1.25em;
+}
+h6 {
+  font-size: 1em;
+}
+p {
+  margin-top: 0;
+  margin-bottom: 2.5rem;
+}
+small,
+sub,
+sup {
+  font-size: 75%;
+}
+hr {
+  border-color: var(--accent-color);
+}
+a {
+  text-decoration: none;
+  color: var(--link-color);
+}
+a:visited {
+  color: var(--link-visted-color);
+}
+a:hover {
+  color: var(--accent-color-hover);
+  text-decoration: underline;
+}
+ul {
+  padding-left: 1.4em;
+  margin-top: 0;
+  margin-bottom: 2.5rem;
+}
+li {
+  margin-bottom: 0.4em;
+}
+blockquote {
+  margin-left: 0;
+  margin-right: 0;
+  padding-left: 1em;
+  padding-top: 0.8em;
+  padding-bottom: 0.8em;
+  padding-right: 0.8em;
+  border-left: 5px solid var(--accent-color);
+  margin-bottom: 2.5rem;
+  background-color: #f1f1f1;
+}
+blockquote p {
+  margin-bottom: 0;
+}
+img,
+video {
+  height: auto;
+  max-width: 100%;
+  margin-top: 0;
+  margin-bottom: 2.5rem;
+}
+pre {
+  background-color: #f1f1f1;
+  display: block;
+  padding: 1em;
+  overflow-x: auto;
+  margin-top: 0;
+  margin-bottom: 2.5rem;
+  font-size: 0.9em;
+}
+code,
+kbd,
+samp {
+  font-size: 0.9em;
+  padding: 0 0.5em;
+  background-color: #f1f1f1;
+  white-space: pre-wrap;
+}
+pre > code {
+  padding: 0;
+  background-color: transparent;
+  white-space: pre;
+  font-size: 1em;
+}
+table {
+  text-align: justify;
+  width: 100%;
+  border-collapse: collapse;
+  margin-bottom: 2rem;
+}
+td,
+th {
+  padding: 0.5em;
+  border-bottom: 1px solid #f1f1f1;
+}
+input,
+textarea {
+  border: 1px solid #4a4a4a;
+}
+input:focus,
+textarea:focus {
+  border: 1px solid var(--accent-color);
+}
+textarea {
+  width: 100%;
+}
+.button,
+button,
+input[type="submit"],
+input[type="reset"],
+input[type="button"],
+input[type="file"]::file-selector-button {
+  display: inline-block;
+  padding: 5px 10px;
+  text-align: center;
+  text-decoration: none;
+  white-space: nowrap;
+  background-color: var(--accent-color);
+  color: #f9f9f9;
+  border-radius: 2px;
+  border: 1px solid var(--accent-color);
+  cursor: pointer;
+  box-sizing: border-box;
+}
+.button[disabled],
+button[disabled],
+input[type="submit"][disabled],
+input[type="reset"][disabled],
+input[type="button"][disabled],
+input[type="file"][disabled] {
+  cursor: default;
+  opacity: 0.5;
+}
+.button:hover,
+button:hover,
+input[type="submit"]:hover,
+input[type="reset"]:hover,
+input[type="button"]:hover,
+input[type="file"]::file-selector-button:hover {
+  background-color: var(--accent-color-hover);
+  color: #f9f9f9;
+  outline: 0;
+}
+.button:focus-visible,
+button:focus-visible,
+input[type="submit"]:focus-visible,
+input[type="reset"]:focus-visible,
+input[type="button"]:focus-visible,
+input[type="file"]::file-selector-button:focus-visible {
+  outline-style: solid;
+  outline-width: 2px;
+}
+textarea,
+select,
+input {
+  color: #4a4a4a;
+  padding: 6px 10px;
+  margin-bottom: 10px;
+  background-color: #f1f1f1;
+  border: 1px solid #f1f1f1;
+  border-radius: 4px;
+  box-shadow: none;
+  box-sizing: border-box;
+}
+textarea:focus,
+select:focus,
+input:focus {
+  border: 1px solid var(--accent-color);
+  outline: 0;
+}
+input[type="checkbox"]:focus {
+  outline: 1px dotted var(--accent-color);
+}
+label,
+legend,
+fieldset {
+  display: block;
+  margin-bottom: 0.5rem;
+  font-weight: 600;
+}

public/js/hash-worker.js ADDED Viewed

	@@ -0,0 +1,120 @@

+importScripts(
+  "https://cdn.jsdelivr.net/npm/hash-wasm@4.11.0/dist/argon2.umd.min.js"
+);
+let active = false;
+let nonce = 0;
+let signature = "";
+let lastNotify = 0;
+let hashesSinceLastNotify = 0;
+let params = {
+  salt: null,
+  hashLength: 0,
+  iterations: 0,
+  memorySize: 0,
+  parallelism: 0,
+  targetValue: BigInt(0),
+  safariFix: false,
+};
+self.onmessage = async (event) => {
+  const { data } = event;
+  switch (data.type) {
+    case "stop":
+      active = false;
+      self.postMessage({ type: "paused", hashes: hashesSinceLastNotify });
+      return;
+    case "start":
+      active = true;
+      signature = data.signature;
+      nonce = data.nonce;
+      const c = data.challenge;
+      const salt = new Uint8Array(c.s.length / 2);
+      for (let i = 0; i < c.s.length; i += 2) {
+        salt[i / 2] = parseInt(c.s.slice(i, i + 2), 16);
+      }
+      params = {
+        salt: salt,
+        hashLength: c.hl,
+        iterations: c.t,
+        memorySize: c.m,
+        parallelism: c.p,
+        targetValue: BigInt(c.d.slice(0, -1)),
+        safariFix: data.isMobileWebkit,
+      };
+      console.log("Started", params);
+      self.postMessage({ type: "started" });
+      setTimeout(solve, 0);
+      break;
+  }
+};
+const doHash = async (password) => {
+  const { salt, hashLength, iterations, memorySize, parallelism } = params;
+  return await self.hashwasm.argon2id({
+    password,
+    salt,
+    hashLength,
+    iterations,
+    memorySize,
+    parallelism,
+  });
+};
+const checkHash = (hash) => {
+  const { targetValue } = params;
+  const hashValue = BigInt(`0x${hash}`);
+  return hashValue <= targetValue;
+};
+const solve = async () => {
+  if (!active) {
+    console.log("Stopped solver", nonce);
+    return;
+  }
+  // Safari WASM doesn't like multiple calls in one worker
+  const batchSize = 1;
+  const batch = [];
+  for (let i = 0; i < batchSize; i++) {
+    batch.push(nonce++);
+  }
+  try {
+    const results = await Promise.all(
+      batch.map(async (nonce) => {
+        const hash = await doHash(String(nonce));
+        return { hash, nonce };
+      })
+    );
+    hashesSinceLastNotify += batchSize;
+    const solution = results.find(({ hash }) => checkHash(hash));
+    if (solution) {
+      console.log("Solution found", solution, params.salt);
+      self.postMessage({ type: "solved", nonce: solution.nonce });
+      active = false;
+    } else {
+      if (Date.now() - lastNotify >= 500) {
+        console.log("Last nonce", nonce, "Hashes", hashesSinceLastNotify);
+        self.postMessage({ type: "progress", hashes: hashesSinceLastNotify });
+        lastNotify = Date.now();
+        hashesSinceLastNotify = 0;
+      }
+      setTimeout(solve, 10);
+    }
+  } catch (error) {
+    console.error("Error", error);
+    const stack = error.stack;
+    const debug = {
+      stack,
+      lastNonce: nonce,
+      targetValue: params.targetValue,
+    };
+    self.postMessage({ type: "error", error: error.message, debug });
+    active = false;
+  }
+};

render.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+services:
+  - type: web
+    name: oai-proxy
+    env: docker
+    repo: https://gitlab.com/khanon/oai-proxy.git
+    region: oregon
+    plan: free
+    branch: main
+    healthCheckPath: /health
+    dockerfilePath: ./docker/render/Dockerfile

scripts/migrate.ts ADDED Viewed

	@@ -0,0 +1,39 @@

+import Database from "better-sqlite3";
+import { DATABASE_VERSION, migrateDatabase } from "../src/shared/database";
+import { logger } from "../src/logger";
+import { config } from "../src/config";
+const log = logger.child({ module: "scripts/migrate" });
+async function runMigration() {
+  let targetVersion = Number(process.argv[2]) || undefined;
+  if (!targetVersion) {
+    log.info("Enter target version or leave empty to use the latest version.");
+    process.stdin.resume();
+    process.stdin.setEncoding("utf8");
+    const input = await new Promise<string>((resolve) => {
+      process.stdin.on("data", (text) => {
+        resolve((String(text) || "").trim());
+      });
+    });
+    process.stdin.pause();
+    targetVersion = Number(input);
+    if (!targetVersion) {
+      targetVersion = DATABASE_VERSION;
+    }
+  }
+  const db = new Database(config.sqliteDataPath, {
+    verbose: (msg, ...args) => log.debug({ args }, String(msg)),
+  });
+  const currentVersion = db.pragma("user_version", { simple: true });
+  log.info({ currentVersion, targetVersion }, "Running migrations.");
+  migrateDatabase(targetVersion, db);
+}
+runMigration().catch((error) => {
+  log.error(error, "Migration failed.");
+  process.exit(1);
+});

scripts/oai-reverse-proxy.http ADDED Viewed

	@@ -0,0 +1,309 @@

+# OAI Reverse Proxy
+###
+# @name OpenAI -- Chat Completions
+POST https://api.openai.com/v1/chat/completions
+Authorization: Bearer {{oai-key-1}}
+Content-Type: application/json
+{
+  "model": "gpt-3.5-turbo",
+  "max_tokens": 30,
+  "stream": false,
+  "messages": [
+    {
+      "role": "user",
+      "content": "This is a test prompt."
+    }
+  ]
+}
+###
+# @name OpenAI -- Text Completions
+POST https://api.openai.com/v1/completions
+Authorization: Bearer {{oai-key-1}}
+Content-Type: application/json
+{
+  "model": "gpt-3.5-turbo-instruct",
+  "max_tokens": 30,
+  "stream": false,
+  "prompt": "This is a test prompt where"
+}
+###
+# @name OpenAI -- Create Embedding
+POST https://api.openai.com/v1/embeddings
+Authorization: Bearer {{oai-key-1}}
+Content-Type: application/json
+{
+  "model": "text-embedding-ada-002",
+  "input": "This is a test embedding input."
+}
+###
+# @name OpenAI -- Get Organizations
+GET https://api.openai.com/v1/organizations
+Authorization: Bearer {{oai-key-1}}
+###
+# @name OpenAI -- Get Models
+GET https://api.openai.com/v1/models
+Authorization: Bearer {{oai-key-1}}
+###
+# @name Azure OpenAI -- Chat Completions
+POST https://{{azu-resource-name}}.openai.azure.com/openai/deployments/{{azu-deployment-id}}/chat/completions?api-version=2023-09-01-preview
+api-key: {{azu-key-1}}
+Content-Type: application/json
+{
+  "max_tokens": 1,
+  "stream": false,
+  "messages": [
+    {
+      "role": "user",
+      "content": "This is a test prompt."
+    }
+  ]
+}
+###
+# @name Proxy / OpenAI -- Get Models
+GET {{proxy-host}}/proxy/openai/v1/models
+Authorization: Bearer {{proxy-key}}
+###
+# @name Proxy / OpenAI -- Native Chat Completions
+POST {{proxy-host}}/proxy/openai/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+{
+  "model": "gpt-4-1106-preview",
+  "max_tokens": 20,
+  "stream": true,
+  "temperature": 1,
+  "seed": 123,
+  "messages": [
+    {
+      "role": "user",
+      "content": "phrase one"
+    }
+  ]
+}
+###
+# @name Proxy / OpenAI -- Native Text Completions
+POST {{proxy-host}}/proxy/openai/v1/turbo-instruct/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+{
+  "model": "gpt-3.5-turbo-instruct",
+  "max_tokens": 20,
+  "temperature": 0,
+  "prompt": "Genshin Impact is a game about",
+  "stream": false
+}
+###
+# @name Proxy / OpenAI -- Chat-to-Text API Translation
+# Accepts a chat completion request and reformats it to work with the text completion API. `model` is ignored.
+POST {{proxy-host}}/proxy/openai/turbo-instruct/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+{
+  "model": "gpt-4",
+  "max_tokens": 20,
+  "stream": true,
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is the name of the fourth president of the united states?"
+    },
+    {
+      "role": "assistant",
+      "content": "That would be George Washington."
+    },
+    {
+      "role": "user",
+      "content": "I don't think that's right..."
+    }
+  ]
+}
+###
+# @name Proxy / OpenAI -- Create Embedding
+POST {{proxy-host}}/proxy/openai/embeddings
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+{
+  "model": "text-embedding-ada-002",
+  "input": "This is a test embedding input."
+}
+###
+# @name Proxy / Anthropic -- Native Completion (old API)
+POST {{proxy-host}}/proxy/anthropic/v1/complete
+Authorization: Bearer {{proxy-key}}
+anthropic-version: 2023-01-01
+Content-Type: application/json
+{
+  "model": "claude-v1.3",
+  "max_tokens_to_sample": 20,
+  "temperature": 0.2,
+  "stream": true,
+  "prompt": "What is genshin impact\n\n:Assistant:"
+}
+###
+# @name Proxy / Anthropic -- Native Completion (2023-06-01 API)
+POST {{proxy-host}}/proxy/anthropic/v1/complete
+Authorization: Bearer {{proxy-key}}
+anthropic-version: 2023-06-01
+Content-Type: application/json
+{
+  "model": "claude-v1.3",
+  "max_tokens_to_sample": 20,
+  "temperature": 0.2,
+  "stream": true,
+  "prompt": "What is genshin impact\n\n:Assistant:"
+}
+###
+# @name Proxy / Anthropic -- OpenAI-to-Anthropic API Translation
+POST {{proxy-host}}/proxy/anthropic/v1/chat/completions
+Authorization: Bearer {{proxy-key}}
+#anthropic-version: 2023-06-01
+Content-Type: application/json
+{
+  "model": "gpt-3.5-turbo",
+  "max_tokens": 20,
+  "stream": false,
+  "temperature": 0,
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is genshin impact"
+    }
+  ]
+}
+###
+# @name Proxy / AWS Claude -- Native Completion
+POST {{proxy-host}}/proxy/aws/claude/v1/complete
+Authorization: Bearer {{proxy-key}}
+anthropic-version: 2023-01-01
+Content-Type: application/json
+{
+  "model": "claude-v2",
+  "max_tokens_to_sample": 10,
+  "temperature": 0,
+  "stream": true,
+  "prompt": "What is genshin impact\n\n:Assistant:"
+}
+###
+# @name Proxy / AWS Claude -- OpenAI-to-Anthropic API Translation
+POST {{proxy-host}}/proxy/aws/claude/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+{
+  "model": "gpt-3.5-turbo",
+  "max_tokens": 50,
+  "stream": true,
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is genshin impact?"
+    }
+  ]
+}
+###
+# @name Proxy / GCP Claude -- Native Completion
+POST {{proxy-host}}/proxy/gcp/claude/v1/complete
+Authorization: Bearer {{proxy-key}}
+anthropic-version: 2023-01-01
+Content-Type: application/json
+{
+  "model": "claude-v2",
+  "max_tokens_to_sample": 10,
+  "temperature": 0,
+  "stream": true,
+  "prompt": "What is genshin impact\n\n:Assistant:"
+}
+###
+# @name Proxy / GCP Claude -- OpenAI-to-Anthropic API Translation
+POST {{proxy-host}}/proxy/gcp/claude/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+{
+  "model": "gpt-3.5-turbo",
+  "max_tokens": 50,
+  "stream": true,
+  "messages": [
+    {
+      "role": "user",
+      "content": "What is genshin impact?"
+    }
+  ]
+}
+###
+# @name Proxy / Azure OpenAI -- Native Chat Completions
+POST {{proxy-host}}/proxy/azure/openai/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+{
+  "model": "gpt-4",
+  "max_tokens": 20,
+  "stream": true,
+  "temperature": 1,
+  "seed": 2,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hi what is the name of the fourth president of the united states?"
+    },
+    {
+      "role": "assistant",
+      "content": "That would be George Washington."
+    },
+    {
+      "role": "user",
+      "content": "That's not right."
+    }
+  ]
+}
+###
+# @name Proxy / Google AI -- OpenAI-to-Google AI API Translation
+POST {{proxy-host}}/proxy/google-ai/v1/chat/completions
+Authorization: Bearer {{proxy-key}}
+Content-Type: application/json
+{
+  "model": "gpt-4",
+  "max_tokens": 42,
+  "messages": [
+    {
+      "role": "user",
+      "content": "Hi what is the name of the fourth president of the united states?"
+    }
+  ]
+}

scripts/seed-events.ts ADDED Viewed

	@@ -0,0 +1,102 @@

+import Database from "better-sqlite3";
+import { v4 as uuidv4 } from "uuid";
+import { config } from "../src/config";
+function generateRandomIP() {
+  return (
+    Math.floor(Math.random() * 255) +
+    "." +
+    Math.floor(Math.random() * 255) +
+    "." +
+    Math.floor(Math.random() * 255) +
+    "." +
+    Math.floor(Math.random() * 255)
+  );
+}
+function generateRandomDate() {
+  const end = new Date();
+  const start = new Date(end);
+  start.setDate(end.getDate() - 90);
+  const randomDate = new Date(
+    start.getTime() + Math.random() * (end.getTime() - start.getTime())
+  );
+  return randomDate.toISOString();
+}
+function generateMockSHA256() {
+  const characters = 'abcdef0123456789';
+  let hash = '';
+  for (let i = 0; i < 64; i++) {
+    const randomIndex = Math.floor(Math.random() * characters.length);
+    hash += characters[randomIndex];
+  }
+  return hash;
+}
+function getRandomModelFamily() {
+  const modelFamilies = [
+    "turbo",
+    "gpt4",
+    "gpt4-32k",
+    "gpt4-turbo",
+    "claude",
+    "claude-opus",
+    "gemini-pro",
+    "mistral-tiny",
+    "mistral-small",
+    "mistral-medium",
+    "mistral-large",
+    "aws-claude",
+    "aws-claude-opus",
+    "gcp-claude",
+    "gcp-claude-opus",
+    "azure-turbo",
+    "azure-gpt4",
+    "azure-gpt4-32k",
+    "azure-gpt4-turbo",
+    "dall-e",
+    "azure-dall-e",
+  ];
+  return modelFamilies[Math.floor(Math.random() * modelFamilies.length)];
+}
+(async () => {
+  const db = new Database(config.sqliteDataPath);
+  const numRows = 100;
+  const insertStatement = db.prepare(`
+  INSERT INTO events (type, ip, date, model, family, hashes, userToken, inputTokens, outputTokens)
+  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+`);
+  const users = Array.from({ length: 10 }, () => uuidv4());
+  function getRandomUser() {
+    return users[Math.floor(Math.random() * users.length)];
+  }
+  const transaction = db.transaction(() => {
+    for (let i = 0; i < numRows; i++) {
+      insertStatement.run(
+        "chat_completion",
+        generateRandomIP(),
+        generateRandomDate(),
+        getRandomModelFamily() + "-" + Math.floor(Math.random() * 100),
+        getRandomModelFamily(),
+        Array.from(
+          { length: Math.floor(Math.random() * 10) },
+          generateMockSHA256
+        ).join(","),
+        getRandomUser(),
+        Math.floor(Math.random() * 500),
+        Math.floor(Math.random() * 6000)
+      );
+    }
+  });
+  transaction();
+  console.log(`Inserted ${numRows} rows into the events table.`);
+  db.close();
+})();

scripts/test-aws-signing.ts ADDED Viewed

	@@ -0,0 +1,118 @@

+// uses the aws sdk to sign a request, then uses axios to send it to the bedrock REST API manually
+import axios from "axios";
+import { Sha256 } from "@aws-crypto/sha256-js";
+import { SignatureV4 } from "@smithy/signature-v4";
+import { HttpRequest } from "@smithy/protocol-http";
+const AWS_ACCESS_KEY_ID = process.env.AWS_ACCESS_KEY_ID!;
+const AWS_SECRET_ACCESS_KEY = process.env.AWS_SECRET_ACCESS_KEY!;
+// Copied from amazon bedrock docs
+// List models
+// ListFoundationModels
+// Service: Amazon Bedrock
+// List of Bedrock foundation models that you can use. For more information, see Foundation models in the
+// Bedrock User Guide.
+//   Request Syntax
+// GET /foundation-models?
+//   byCustomizationType=byCustomizationType&byInferenceType=byInferenceType&byOutputModality=byOutputModality&byProvider=byProvider
+//   HTTP/1.1
+// URI Request Parameters
+// The request uses the following URI parameters.
+// byCustomizationType (p. 38)
+// List by customization type.
+//   Valid Values: FINE_TUNING
+// byInferenceType (p. 38)
+// List by inference type.
+//   Valid Values: ON_DEMAND | PROVISIONED
+// byOutputModality (p. 38)
+// List by output modality type.
+//   Valid Values: TEXT | IMAGE | EMBEDDING
+// byProvider (p. 38)
+// A Bedrock model provider.
+//   Pattern: ^[a-z0-9-]{1,63}$
+// Request Body
+// The request does not have a request body
+// Run inference on a text model
+// Send an invoke request to run inference on a Titan Text G1 - Express model. We set the accept
+// parameter to accept any content type in the response.
+//   POST https://bedrock.us-east-1.amazonaws.com/model/amazon.titan-text-express-v1/invoke
+//   -H accept: */*
+// -H content-type: application/json
+// Payload
+// {"inputText": "Hello world"}
+// Example response
+// Response for the above request.
+// -H content-type: application/json
+// Payload
+// <the model response>
+const AMZ_REGION = "us-east-1";
+const AMZ_HOST = "invoke-bedrock.us-east-1.amazonaws.com";
+async function listModels() {
+  const httpRequest = new HttpRequest({
+    method: "GET",
+    protocol: "https:",
+    hostname: AMZ_HOST,
+    path: "/foundation-models",
+    headers: { ["Host"]: AMZ_HOST },
+  });
+  const signedRequest = await signRequest(httpRequest);
+  const response = await axios.get(
+    `https://${signedRequest.hostname}${signedRequest.path}`,
+    { headers: signedRequest.headers }
+  );
+  console.log(response.data);
+}
+async function invokeModel() {
+  const model = "anthropic.claude-v1";
+  const httpRequest = new HttpRequest({
+    method: "POST",
+    protocol: "https:",
+    hostname: AMZ_HOST,
+    path: `/model/${model}/invoke`,
+    headers: {
+      ["Host"]: AMZ_HOST,
+      ["accept"]: "*/*",
+      ["content-type"]: "application/json",
+    },
+    body: JSON.stringify({
+      temperature: 0.5,
+      prompt: "\n\nHuman:Hello world\n\nAssistant:",
+      max_tokens_to_sample: 10,
+    }),
+  });
+  console.log("httpRequest", httpRequest);
+  const signedRequest = await signRequest(httpRequest);
+  const response = await axios.post(
+    `https://${signedRequest.hostname}${signedRequest.path}`,
+    signedRequest.body,
+    { headers: signedRequest.headers }
+  );
+  console.log(response.status);
+  console.log(response.headers);
+  console.log(response.data);
+  console.log("full url", response.request.res.responseUrl);
+}
+async function signRequest(request: HttpRequest) {
+  const signer = new SignatureV4({
+    sha256: Sha256,
+    credentials: {
+      accessKeyId: AWS_ACCESS_KEY_ID,
+      secretAccessKey: AWS_SECRET_ACCESS_KEY,
+    },
+    region: AMZ_REGION,
+    service: "bedrock",
+  });
+  return await signer.sign(request, { signingDate: new Date() });
+}
+// listModels();
+// invokeModel();

scripts/test-concurrency.js ADDED Viewed

	@@ -0,0 +1,45 @@

+const axios = require("axios");
+const concurrentRequests = 75;
+const headers = {
+  Authorization: "Bearer test",
+  "Content-Type": "application/json",
+};
+const payload = {
+  model: "gpt-4",
+  max_tokens: 1,
+  stream: false,
+  messages: [{ role: "user", content: "Hi" }],
+};
+const makeRequest = async (i) => {
+  try {
+    const response = await axios.post(
+      "http://localhost:7860/proxy/google-ai/v1/chat/completions",
+      payload,
+      { headers }
+    );
+    console.log(
+      `Req ${i} finished with status code ${response.status} and response:`,
+      response.data
+    );
+  } catch (error) {
+    const msg = error.response
+    console.error(`Error in req ${i}:`, error.message, msg || "");
+  }
+};
+const executeRequestsConcurrently = () => {
+  const promises = [];
+  for (let i = 1; i <= concurrentRequests; i++) {
+    console.log(`Starting request ${i}`);
+    promises.push(makeRequest(i));
+  }
+  Promise.all(promises).then(() => {
+    console.log("All requests finished");
+  });
+};
+executeRequestsConcurrently();

scripts/test-queue.js ADDED Viewed

	@@ -0,0 +1,53 @@

+const axios = require("axios");
+function randomInteger(max) {
+  return Math.floor(Math.random() * max + 1);
+}
+async function testQueue() {
+  const requests = Array(10).fill(undefined).map(async function() {
+    const maxTokens = randomInteger(2000);
+    const headers = {
+      "Authorization": "Bearer test",
+      "Content-Type": "application/json",
+      "X-Forwarded-For": `${randomInteger(255)}.${randomInteger(255)}.${randomInteger(255)}.${randomInteger(255)}`,
+    };
+    const payload = {
+      model: "gpt-4o-mini-2024-07-18",
+      max_tokens: 20 + maxTokens,
+      stream: false,
+      messages: [{role: "user", content: "You are being benchmarked regarding your reliability at outputting exact, machine-comprehensible data. Output the sentence \"The quick brown fox jumps over the lazy dog.\" Do not precede it with quotemarks or any form of preamble, and do not output anything after the sentence."}],
+      temperature: 0,
+    };
+    try {
+      const response = await axios.post(
+        "http://localhost:7860/proxy/openai/v1/chat/completions",
+        payload,
+        { headers }
+      );
+            if (response.status !== 200) {
+          console.error(`Request {$maxTokens} finished with status code ${response.status} and response`, response.data);
+          return;
+        }
+      const content = response.data.choices[0].message.content;
+      console.log(
+        `Request ${maxTokens} `,
+        content === "The quick brown fox jumps over the lazy dog." ? "OK" : `mangled: ${content}`
+      );
+    } catch (error) {
+      const msg = error.response;
+      console.error(`Error in req ${maxTokens}:`, error.message, msg || "");
+    }
+  });
+  await Promise.all(requests);
+  console.log("All requests finished");
+}
+testQueue();

src/admin/api/events.ts ADDED Viewed

	@@ -0,0 +1,49 @@

+import { Router } from "express";
+import { z } from "zod";
+import { encodeCursor, decodeCursor } from "../../shared/utils";
+import { eventsRepo } from "../../shared/database/repos/event";
+const router = Router();
+/**
+ * Returns events for the given user token.
+ * GET /admin/events/:token
+ * @query first - The number of events to return.
+ * @query after - The cursor to start returning events from (exclusive).
+ */
+router.get("/:token", (req, res) => {
+  const schema = z.object({
+    token: z.string(),
+    first: z.coerce.number().int().positive().max(200).default(25),
+    after: z
+      .string()
+      .optional()
+      .transform((v) => {
+        try {
+          return decodeCursor(v);
+        } catch {
+          return null;
+        }
+      })
+      .nullable(),
+    sort: z.string().optional(),
+  });
+  const args = schema.safeParse({ ...req.params, ...req.query });
+  if (!args.success) {
+    return res.status(400).json({ error: args.error });
+  }
+  const data = eventsRepo
+    .getUserEvents(args.data.token, {
+      limit: args.data.first,
+      cursor: args.data.after,
+    })
+    .map((e) => ({ node: e, cursor: encodeCursor(e.date) }));
+  res.json({
+    data,
+    endCursor: data[data.length - 1]?.cursor,
+  });
+});
+export { router as eventsApiRouter };

src/admin/api/users.ts ADDED Viewed

	@@ -0,0 +1,117 @@

+import { Router } from "express";
+import { z } from "zod";
+import * as userStore from "../../shared/users/user-store";
+import { parseSort, sortBy } from "../../shared/utils";
+import { UserPartialSchema, UserSchema } from "../../shared/users/schema";
+const router = Router();
+/**
+ * Returns a list of all users, sorted by prompt count and then last used time.
+ * GET /admin/users
+ */
+router.get("/", (req, res) => {
+  const sort = parseSort(req.query.sort) || ["promptCount", "lastUsedAt"];
+  const users = userStore.getUsers().sort(sortBy(sort, false));
+  res.json({ users, count: users.length });
+});
+/**
+ * Returns the user with the given token.
+ * GET /admin/users/:token
+ */
+router.get("/:token", (req, res) => {
+  const user = userStore.getUser(req.params.token);
+  if (!user) {
+    return res.status(404).json({ error: "Not found" });
+  }
+  res.json(user);
+});
+/**
+ * Creates a new user.
+ * Optionally accepts a JSON body containing `type`, and for temporary-type
+ * users, `tokenLimits` and `expiresAt` fields.
+ * Returns the created user's token.
+ * POST /admin/users
+ */
+router.post("/", (req, res) => {
+  const body = req.body;
+  const base = z.object({
+    type: UserSchema.shape.type.exclude(["temporary"]).default("normal"),
+  });
+  const tempUser = base
+    .extend({
+      type: z.literal("temporary"),
+      expiresAt: UserSchema.shape.expiresAt,
+      tokenLimits: UserSchema.shape.tokenLimits,
+    })
+    .required();
+  const schema = z.union([base, tempUser]);
+  const result = schema.safeParse(body);
+  if (!result.success) {
+    return res.status(400).json({ error: result.error });
+  }
+  const token = userStore.createUser({ ...result.data });
+  res.json({ token });
+});
+/**
+ * Updates the user with the given token, creating them if they don't exist.
+ * Accepts a JSON body containing at least one field on the User type.
+ * Returns the upserted user.
+ * PUT /admin/users/:token
+ */
+router.put("/:token", (req, res) => {
+  const result = UserPartialSchema.safeParse({
+    ...req.body,
+    token: req.params.token,
+  });
+  if (!result.success) {
+    return res.status(400).json({ error: result.error });
+  }
+  userStore.upsertUser(result.data);
+  res.json(userStore.getUser(req.params.token));
+});
+/**
+ * Bulk-upserts users given a list of User updates.
+ * Accepts a JSON body with the field `users` containing an array of updates.
+ * Returns an object containing the upserted users and the number of upserts.
+ * PUT /admin/users
+ */
+router.put("/", (req, res) => {
+  const result = z.array(UserPartialSchema).safeParse(req.body.users);
+  if (!result.success) {
+    return res.status(400).json({ error: result.error });
+  }
+  const upserts = result.data.map((user) => userStore.upsertUser(user));
+  res.json({ upserted_users: upserts, count: upserts.length });
+});
+/**
+ * Disables the user with the given token. Optionally accepts a `disabledReason`
+ * query parameter.
+ * Returns the disabled user.
+ * DELETE /admin/users/:token
+ */
+router.delete("/:token", (req, res) => {
+  const user = userStore.getUser(req.params.token);
+  const disabledReason = z
+    .string()
+    .optional()
+    .safeParse(req.query.disabledReason);
+  if (!disabledReason.success) {
+    return res.status(400).json({ error: disabledReason.error });
+  }
+  if (!user) {
+    return res.status(404).json({ error: "Not found" });
+  }
+  userStore.disableUser(req.params.token, disabledReason.data);
+  res.json(userStore.getUser(req.params.token));
+});
+export { router as usersApiRouter };

src/admin/auth.ts ADDED Viewed

	@@ -0,0 +1,54 @@

+import { Request, Response, RequestHandler } from "express";
+import { config } from "../config";
+const ADMIN_KEY = config.adminKey;
+const failedAttempts = new Map<string, number>();
+type AuthorizeParams = { via: "cookie" | "header" };
+export const authorize: ({ via }: AuthorizeParams) => RequestHandler =
+  ({ via }) =>
+  (req, res, next) => {
+    const bearerToken = req.headers.authorization?.slice("Bearer ".length);
+    const cookieToken = req.session.adminToken;
+    const token = via === "cookie" ? cookieToken : bearerToken;
+    const attempts = failedAttempts.get(req.ip) ?? 0;
+    if (!ADMIN_KEY) {
+      req.log.warn(
+        { ip: req.ip },
+        `Blocked admin request because no admin key is configured`
+      );
+      return res.status(401).json({ error: "Unauthorized" });
+    }
+    if (attempts > 5) {
+      req.log.warn(
+        { ip: req.ip, token: bearerToken },
+        `Blocked admin request due to too many failed attempts`
+      );
+      return res.status(401).json({ error: "Too many attempts" });
+    }
+    if (token && token === ADMIN_KEY) {
+      return next();
+    }
+    req.log.warn(
+      { ip: req.ip, attempts, invalidToken: String(token) },
+      `Attempted admin request with invalid token`
+    );
+    return handleFailedLogin(req, res);
+  };
+function handleFailedLogin(req: Request, res: Response) {
+  const attempts = failedAttempts.get(req.ip) ?? 0;
+  const newAttempts = attempts + 1;
+  failedAttempts.set(req.ip, newAttempts);
+  if (req.accepts("json", "html") === "json") {
+    return res.status(401).json({ error: "Unauthorized" });
+  }
+  delete req.session.adminToken;
+  req.session.flash = { type: "error", message: `Invalid admin key.` };
+  return res.redirect("/admin/login");
+}