diff --git a/.README.md.swp b/.README.md.swp
new file mode 100644
index 0000000000000000000000000000000000000000..a40e22da244613637b97bf709f6fc4153804a6c3
Binary files /dev/null and b/.README.md.swp differ
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000000000000000000000000000000000000..3b63869c22e469972bf4b03a2b50bcf9cd44c012
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,26 @@
+# Port
+PORT=8000
+
+# Image
+CHATMOCK_IMAGE=storagetime/chatmock:latest
+
+# Auth dir
+CHATGPT_LOCAL_HOME=/data
+
+# show request/stream logs
+VERBOSE=false
+
+# OAuth client id (modify only if you know what you're doing)
+# CHATGPT_LOCAL_CLIENT_ID=app_EMoamEEZ73f0CkXaXp7hrann
+
+# Reasoning controls
+CHATGPT_LOCAL_REASONING_EFFORT=medium       # none|minimal|low|medium|high|xhigh
+CHATGPT_LOCAL_REASONING_SUMMARY=auto        # auto|concise|detailed|none
+CHATGPT_LOCAL_REASONING_COMPAT=think-tags   # legacy|o3|think-tags|current
+CHATGPT_LOCAL_EXPOSE_REASONING_MODELS=false
+
+# Enable default web search tool
+CHATGPT_LOCAL_ENABLE_WEB_SEARCH=false
+
+# Force a specific model name
+# CHATGPT_LOCAL_DEBUG_MODEL=gpt-5.4
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000000000000000000000000000000000000..46e3b7675a6e1535e3db93e9a295b5721c27b70d
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,20 @@
+name: ci
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv pip install --system .
+      - run: python -m unittest discover -s tests
+      - run: uv build
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000000000000000000000000000000000000..78fb81998d8bb246e944cb49f81c3a99e46b5bb6
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,229 @@
+name: release
+
+on:
+  push:
+    tags:
+      - "v*"
+
+permissions:
+  contents: write
+
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+    outputs:
+      version: ${{ steps.version.outputs.version }}
+      tag: ${{ steps.version.outputs.tag }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - id: version
+        run: |
+          VERSION="${GITHUB_REF_NAME#v}"
+          PACKAGE_VERSION="$(python - <<'PY'
+          import runpy
+          print(runpy.run_path("chatmock/version.py")["__version__"])
+          PY
+          )"
+          if [ "$VERSION" != "$PACKAGE_VERSION" ]; then
+            echo "Tag version $VERSION does not match package version $PACKAGE_VERSION" >&2
+            exit 1
+          fi
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          echo "tag=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv pip install --system .
+      - run: python -m unittest discover -s tests
+
+  build-python:
+    needs: validate
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv build
+      - uses: actions/upload-artifact@v4
+        with:
+          name: python-dist
+          path: dist/*
+
+  publish-pypi:
+    needs:
+      - validate
+      - build-python
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: python-dist
+          path: dist
+      - uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist
+
+  build-windows:
+    needs: validate
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: python -m pip install --upgrade pip
+      - run: python -m pip install ".[gui]"
+      - run: python build.py --name ChatMock
+      - run: Compress-Archive -Path dist/ChatMock -DestinationPath dist/ChatMock-windows.zip
+        shell: pwsh
+      - uses: actions/upload-artifact@v4
+        with:
+          name: windows-gui
+          path: dist/ChatMock-windows.zip
+
+  build-macos:
+    needs: validate
+    runs-on: macos-latest
+    env:
+      APPLE_CERTIFICATE_P12_BASE64: ${{ secrets.APPLE_CERTIFICATE_P12_BASE64 }}
+      APPLE_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }}
+      APPLE_SIGNING_IDENTITY: ${{ secrets.APPLE_SIGNING_IDENTITY }}
+      APPLE_ID: ${{ secrets.APPLE_ID }}
+      APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
+      APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: python -m pip install --upgrade pip
+      - run: python -m pip install ".[gui]"
+      - run: |
+          security create-keychain -p "$RUNNER_TEMP" build.keychain
+          security default-keychain -s build.keychain
+          security unlock-keychain -p "$RUNNER_TEMP" build.keychain
+          security set-keychain-settings -lut 21600 build.keychain
+          python - <<'PY'
+          import base64
+          import os
+          from pathlib import Path
+          data = os.environ["APPLE_CERTIFICATE_P12_BASE64"]
+          Path(os.environ["RUNNER_TEMP"], "chatmock-signing.p12").write_bytes(base64.b64decode(data))
+          PY
+          security import "$RUNNER_TEMP/chatmock-signing.p12" -k build.keychain -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign -T /usr/bin/security
+          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$RUNNER_TEMP" build.keychain
+      - run: python build.py --name ChatMock
+      - run: codesign --force --deep --options runtime --sign "$APPLE_SIGNING_IDENTITY" dist/ChatMock.app
+      - run: codesign --verify --deep --strict dist/ChatMock.app
+      - run: python build.py --name ChatMock --dmg-only
+      - run: codesign --force --sign "$APPLE_SIGNING_IDENTITY" dist/ChatMock.dmg
+      - run: codesign --verify --strict dist/ChatMock.dmg
+      - run: xcrun notarytool submit dist/ChatMock.dmg --apple-id "$APPLE_ID" --password "$APPLE_APP_SPECIFIC_PASSWORD" --team-id "$APPLE_TEAM_ID" --wait
+      - run: xcrun stapler staple dist/ChatMock.dmg
+      - run: xcrun stapler validate dist/ChatMock.dmg
+      - uses: actions/upload-artifact@v4
+        with:
+          name: macos-gui
+          path: dist/ChatMock.dmg
+
+  docker:
+    needs: validate
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: docker/setup-qemu-action@v3
+      - uses: docker/setup-buildx-action@v3
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: storagetime/chatmock
+          tags: |
+            type=raw,value=latest
+            type=raw,value=${{ needs.validate.outputs.tag }}
+            type=raw,value=${{ needs.validate.outputs.version }}
+      - uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+
+  homebrew:
+    needs: validate
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          ARCHIVE_URL="https://github.com/${GITHUB_REPOSITORY}/archive/refs/tags/${GITHUB_REF_NAME}.tar.gz"
+          SHA256="$(curl -fsSL "$ARCHIVE_URL" | shasum -a 256 | awk '{print $1}')"
+          git clone "https://x-access-token:${{ secrets.HOMEBREW_TAP_TOKEN }}@github.com/RayBytes/homebrew-chatmock.git" tap
+          cd tap
+          cat <<EOF > chatmock.rb
+          class Chatmock < Formula
+            include Language::Python::Virtualenv
+
+            desc "OpenAI & Ollama compatible API powered by your ChatGPT plan"
+            homepage "https://github.com/RayBytes/ChatMock"
+            url "${ARCHIVE_URL}"
+            sha256 "${SHA256}"
+            license "MIT"
+            head "https://github.com/RayBytes/ChatMock.git", branch: "main"
+
+            depends_on "python@3.11"
+
+            def install
+              virtualenv_create(libexec, "python3.11")
+              system libexec/"bin/pip", "install", "."
+              bin.install_symlink libexec/"bin/chatmock"
+            end
+
+            def caveats
+              <<~EOS
+                To get started with ChatMock:
+                  chatmock login
+                  chatmock serve
+              EOS
+            end
+
+            test do
+              output = shell_output("#{bin}/chatmock --help 2>&1")
+              assert_match "ChatMock", output
+            end
+          end
+          EOF
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git add chatmock.rb
+          git commit -m "chatmock ${GITHUB_REF_NAME}" || exit 0
+          git push
+
+  release-assets:
+    needs:
+      - validate
+      - build-python
+      - build-windows
+      - build-macos
+      - publish-pypi
+      - docker
+      - homebrew
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          path: release-artifacts
+      - run: find release-artifacts -type f | sort
+      - uses: softprops/action-gh-release@v2
+        with:
+          files: |
+            release-artifacts/python-dist/*
+            release-artifacts/windows-gui/*
+            release-artifacts/macos-gui/*
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..9da8bc0291d83f9cf9e4f8fa98964cb9fc5ff564
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,21 @@
+# Python bytecode
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Virtual environments
+.env/
+.venv/
+venv/
+
+# Packaging artifacts
+build/
+dist/
+*.egg-info/
+
+# Tool caches
+.pytest_cache/
+.mypy_cache/
+
+# OS clutter
+.DS_Store
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000000000000000000000000000000000000..6c4919f18b796d5945f1be99207f8231f611bdeb
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,37 @@
+# Contributing to ChatMock
+
+We welcome thoughtful improvements. This guide calls out the expectations that keep reviews quick and the project stable.
+
+# How should I contribute?
+
+### Before changing code...
+- Open an issue before large or risky efforts so scope is agreed up front.
+- Keep pull requests focused and easy to follow & break sweeping changes into a series when possible.
+- Treat documentation, code, and packaging (CLI, Docker, GUI) as a single surface (your updates should apply to all).
+
+### Getting Set Up
+- Review the Quickstart section in README.md
+- Go through the codebase, and ensure you understand the current codebase. 
+- Confirm you can log in and serve a local instance, then make a couple of sample requests to understand current behaviour so you know if it broke later on.
+
+### Working With Core Files
+- `prompt.md` and related Codex harness files are sensitive. Do not modify them or move entry points without prior maintainer approval.
+- Be cautious with parameter names, response payload shapes, and file locations consumed by downstream clients. Coordinate before changing them.
+- When touching shared logic, update both OpenAI and Ollama routes, plus any CLI/GUI code that depends on the same behaviour.
+
+## Designing Features and Fixes
+- Prefer opt-in flags or config switches for new capabilities & leave defaults unchanged until maintainers confirm the rollout plan.
+- Document any limits, or external dependencies introduced by your change.
+- Validate compatibility with popular clients (e.g. Jan, Raycast, custom OpenAI SDKs) when responses or streaming formats shift.
+
+# Pull Request Checklist
+- [ ] Rebased on the latest `main` and issue reference included when applicable.
+- [ ] Manual verification steps captured under "How to try locally" in the PR body.
+- [ ] README.md, DOCKER.md, and other docs updated—or explicitly noted as not required.
+- [ ] No generated artefacts or caches staged (`build/`, `dist/`, `__pycache__/`, `.pytest_cache/`, etc.).
+- [ ] Critical paths (`prompt.md`, routing modules, public parameter names) reviewed for unintended edits and discussed with maintainers if changes were necessary.
+
+## Need Help?
+- If you're not sure about about scope, flags, or how to implement a certain feature, always create an issue before hand.
+
+Thank you for you contribution!
diff --git a/DOCKER.md b/DOCKER.md
new file mode 100644
index 0000000000000000000000000000000000000000..c483d63026fb03cd56ea016be91b26bd57398832
--- /dev/null
+++ b/DOCKER.md
@@ -0,0 +1,41 @@
+# Docker Deployment
+
+## Quick Start
+1) Setup env:
+   cp .env.example .env
+
+2) Login:
+   docker compose run --rm --service-ports chatmock-login login
+
+   - The command prints an auth URL, copy paste it into your browser.
+   - If your browser cannot reach the container's localhost callback, copy the full redirect URL from the browser address bar and paste it back into the terminal when prompted.
+   - Server should stop automatically once it receives the tokens and they are saved.
+
+3) Start the server:
+   docker compose up -d chatmock
+
+4) Free to use it in whichever chat app you like!
+
+## Configuration
+Set options in `.env` or pass environment variables:
+- `PORT`: Container listening port (default 8000)
+- `CHATMOCK_IMAGE`: image tag to run (default `storagetime/chatmock:latest`)
+- `VERBOSE`: `true|false` to enable request/stream logs
+- `CHATGPT_LOCAL_REASONING_EFFORT`: minimal|low|medium|high|xhigh
+- `CHATGPT_LOCAL_REASONING_SUMMARY`: auto|concise|detailed|none
+- `CHATGPT_LOCAL_REASONING_COMPAT`: legacy|o3|think-tags|current
+- `CHATGPT_LOCAL_FAST_MODE`: `true|false` to enable fast mode by default for supported models
+- `CHATGPT_LOCAL_CLIENT_ID`: OAuth client id override (rarely needed)
+- `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS`: `true|false` to add reasoning model variants to `/v1/models`
+- `CHATGPT_LOCAL_ENABLE_WEB_SEARCH`: `true|false` to enable default web search tool
+
+## Logs
+Set `VERBOSE=true` to include extra logging for troubleshooting upstream or chat app requests. Please include and use these logs when submitting bug reports.
+
+## Test
+
+```
+curl -s http://localhost:8000/v1/chat/completions \
+   -H 'Content-Type: application/json' \
+   -d '{"model":"gpt-5-codex","messages":[{"role":"user","content":"Hello world!"}]}' | jq .
+```
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..10584ccab526651f8f77bea787280c81adc0abbb
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,20 @@
+FROM python:3.11-slim
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+
+WORKDIR /app
+
+COPY pyproject.toml README.md chatmock.py prompt.md prompt_gpt5_codex.md /app/
+COPY chatmock /app/chatmock
+RUN pip install --no-cache-dir .
+
+RUN mkdir -p /data
+
+COPY docker/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+
+EXPOSE 7860 1455
+
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["serve"]
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..53e93f4b7122b04e9cb95bb6e4436e64481098e6
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Game_Time
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index 5d3045474a9d3f65ee17e8a0a36105419f9d6314..d5c7aa4432cbe593874dfa84f8ed9cdc79a43157 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,209 @@
----
-title: CheckMat
-emoji: 💻
-colorFrom: indigo
-colorTo: indigo
-sdk: docker
-pinned: false
----
-
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+<div align="center">
+
+# ChatMock
+
+**Allows Codex to work in your favourite chat apps and coding tools.**
+
+[![PyPI](https://img.shields.io/pypi/v/chatmock?color=blue&label=pypi)](https://pypi.org/project/chatmock/)
+[![Python](https://img.shields.io/pypi/pyversions/chatmock)](https://pypi.org/project/chatmock/)
+[![License](https://img.shields.io/github/license/RayBytes/ChatMock)](LICENSE)
+[![Stars](https://img.shields.io/github/stars/RayBytes/ChatMock?style=flat)](https://github.com/RayBytes/ChatMock/stargazers)
+[![Last Commit](https://img.shields.io/github/last-commit/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/commits/main)
+[![Issues](https://img.shields.io/github/issues/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/issues)
+
+<br>
+
+
+</div>
+
+<br>
+
+## Install
+
+#### Homebrew
+```bash
+brew tap RayBytes/chatmock
+brew install chatmock
+```
+
+#### pipx / pip
+```bash
+pipx install chatmock
+```
+
+#### GUI
+Download from [releases](https://github.com/RayBytes/ChatMock/releases) (macOS & Windows)
+
+#### Docker
+See [DOCKER.md](DOCKER.md)
+
+#### Hugging Face
+See [Hugging Face Deployment](#hugging-face-deployment)
+
+<br>
+
+## Getting Started
+
+```bash
+# 1. Sign in with your ChatGPT account
+chatmock login
+
+# 2. Start the server
+chatmock serve
+```
+
+The server runs at `http://127.0.0.1:8000` by default. Use `http://127.0.0.1:8000/v1` as your base URL for OpenAI-compatible apps.
+
+<br>
+
+## Usage
+
+<details open>
+<summary><b>Python</b></summary>
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://127.0.0.1:8000/v1",
+    api_key="anything"  # not checked
+)
+
+response = client.chat.completions.create(
+    model="gpt-5.4",
+    messages=[{"role": "user", "content": "hello"}]
+)
+print(response.choices[0].message.content)
+```
+
+</details>
+
+<details>
+<summary><b>cURL</b></summary>
+
+```bash
+curl http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5.4",
+    "messages": [{"role": "user", "content": "hello"}]
+  }'
+```
+
+</details>
+
+<details>
+<summary><b>Custom API (Plain Text)</b></summary>
+
+```bash
+# Request format: {"prompt": "..."}
+# Response format: {"status": "success", "text": "..."}
+
+curl http://127.0.0.1:8000/api \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": "hello"}'
+
+# You can also specify the model in the URL
+curl http://127.0.0.1:8000/gpt-5.5/api \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": "hello"}'
+```
+
+</details>
+
+<br>
+
+## Supported Models
+
+- `gpt-5.5`
+- `gpt-5.4`
+- `gpt-5.4-mini`
+- `gpt-5.2`
+- `gpt-5.1`
+- `gpt-5`
+- `gpt-5.3-codex`
+- `gpt-5.3-codex-spark`
+- `gpt-5.2-codex`
+- `gpt-5-codex`
+- `gpt-5.1-codex`
+- `gpt-5.1-codex-max`
+- `gpt-5.1-codex-mini`
+- `codex-mini`
+
+<br>
+
+## Features
+
+- Tool / function calling
+- Vision / image input
+- Thinking summaries (via think tags)
+- Configurable thinking effort
+- Fast mode for supported models
+- Web search tool
+- OpenAI-compatible `/v1/responses` (HTTP + WebSocket)
+- Ollama-compatible endpoints
+- Reasoning effort exposed as separate models (optional)
+
+<br>
+
+## Configuration
+
+All flags go after `chatmock serve`. These can also be set as environment variables.
+
+| Flag | Env var | Options | Default | Description |
+|------|---------|---------|---------|-------------|
+| `--reasoning-effort` | `CHATGPT_LOCAL_REASONING_EFFORT` | none, minimal, low, medium, high, xhigh | medium | How hard the model thinks |
+| `--reasoning-summary` | `CHATGPT_LOCAL_REASONING_SUMMARY` | auto, concise, detailed, none | auto | Thinking summary verbosity |
+| `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client |
+| `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models |
+| `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web |
+| `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |
+
+<details>
+<summary><b>Web search in a request</b></summary>
+
+```json
+{
+  "model": "gpt-5.4",
+  "messages": [{"role": "user", "content": "latest news on ..."}],
+  "responses_tools": [{"type": "web_search"}],
+  "responses_tool_choice": "auto"
+}
+```
+
+</details>
+
+<details>
+<summary><b>Fast mode in a request</b></summary>
+
+```json
+{
+  "model": "gpt-5.4",
+  "input": "summarize this",
+  "fast_mode": true
+}
+```
+
+</details>
+
+<br>
+
+## Notes
+
+Use responsibly and at your own risk. This project is not affiliated with OpenAI.
+
+<br>
+
+## Hugging Face Deployment
+
+1. **Get Auth**: Run `python chatmock.py info --json` locally and copy the output.
+2. **Create Space**: Create a new **Docker** Space on Hugging Face.
+3. **Upload**: Upload all project files to the Space.
+4. **Secret**: In Space Settings, add a secret named `AUTH_JSON` and paste your auth data as the value.
+5. **Done**: Your API will be available at `https://<user>-<space>.hf.space/api`
+
+<br>
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=RayBytes/ChatMock&type=Timeline)](https://www.star-history.com/#RayBytes/ChatMock&Timeline)
diff --git a/build.py b/build.py
new file mode 100644
index 0000000000000000000000000000000000000000..e85ee54385eecb5bdb39f7849fbecb0c48a6506a
--- /dev/null
+++ b/build.py
@@ -0,0 +1,225 @@
+from __future__ import annotations
+
+import argparse
+import os
+import platform
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+import plistlib
+from PIL import Image
+
+
+ROOT = Path(__file__).parent.resolve()
+BUILD_DIR = ROOT / "build"
+ICONS_DIR = BUILD_DIR / "icons"
+
+
+def info(msg: str) -> None:
+    print(f"[build] {msg}")
+
+
+def ensure_dirs() -> None:
+    ICONS_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def load_icon_png(path: Path) -> Image.Image:
+    if Image is None:
+        raise RuntimeError("Pillow is required to process icons.")
+    img = Image.open(path).convert("RGBA")
+    size = max(img.width, img.height)
+    canvas = Image.new("RGBA", (size, size), (0, 0, 0, 0))
+    x = (size - img.width) // 2
+    y = (size - img.height) // 2
+    canvas.paste(img, (x, y))
+    return canvas
+
+
+def rounded(img: Image.Image, radius_ratio: float = 0.22) -> Image.Image:
+    if Image is None:
+        return img
+    w, h = img.size
+    r = int(min(w, h) * max(0.0, min(radius_ratio, 0.5)))
+    if r <= 0:
+        return img
+    mask = Image.new("L", (w, h), 0)
+    from PIL import ImageDraw
+    d = ImageDraw.Draw(mask)
+    d.rounded_rectangle((0, 0, w, h), radius=r, fill=255)
+    out = img.copy()
+    out.putalpha(mask)
+    return out
+
+
+def make_windows_ico(src_png: Path, out_ico: Path, radius_ratio: float) -> Path:
+    info("Generating Windows .ico")
+    square = load_icon_png(src_png)
+    sizes = [16, 24, 32, 48, 64, 128, 256]
+    images = [rounded(square.resize((s, s), Image.LANCZOS), radius_ratio) for s in sizes]
+    images[0].save(out_ico, format="ICO", sizes=[(s, s) for s in sizes])
+    return out_ico
+
+
+def make_macos_icns(src_png: Path, out_icns: Path, radius_ratio: float) -> Path:
+    info("Generating macOS .icns")
+    iconset = BUILD_DIR / "icon.iconset"
+    if iconset.exists():
+        shutil.rmtree(iconset)
+    iconset.mkdir(parents=True, exist_ok=True)
+
+    square = load_icon_png(src_png)
+    sizes = [16, 32, 64, 128, 256, 512, 1024]
+    mapping = {
+        16:  ["icon_16x16.png", "icon_32x32.png"],
+        32:  ["icon_16x16@2x.png"],
+        64:  ["icon_32x32@2x.png"],
+        128: ["icon_128x128.png", "icon_256x256.png"],
+        256: ["icon_128x128@2x.png"],
+        512: ["icon_512x512.png"],
+        1024:["icon_512x512@2x.png"],
+    }
+    for s in sizes:
+        img = rounded(square.resize((s, s), Image.LANCZOS), radius_ratio)
+        for name in mapping.get(s, []):
+            img.save(iconset / name, format="PNG")
+
+    try:
+        subprocess.run(["iconutil", "-c", "icns", str(iconset), "-o", str(out_icns)], check=True)
+    except Exception as e:
+        raise RuntimeError("Failed to create .icns. Ensure Xcode command line tools are installed (iconutil).\n"
+                           f"Details: {e}")
+    finally:
+        shutil.rmtree(iconset, ignore_errors=True)
+    return out_icns
+
+
+def pyinstaller_add_data_arg(src: Path, dest: str) -> str:
+    sep = ";" if os.name == "nt" else ":"
+    return f"{src}{sep}{dest}"
+
+
+def run_pyinstaller(entry: Path, name: str, icon: Path | None, extra_data: list[tuple[Path, str]], bundle_id: str | None = None) -> None:
+    cmd = [
+        sys.executable, "-m", "PyInstaller",
+        "--windowed", "--noconfirm",
+        "--name", name,
+    ]
+    if bundle_id and platform.system().lower() == "darwin":
+        cmd += ["--osx-bundle-identifier", bundle_id]
+    if icon is not None:
+        cmd += ["--icon", str(icon)]
+    for (src, dest) in extra_data:
+        cmd += ["--add-data", pyinstaller_add_data_arg(src, dest)]
+    cmd.append(str(entry))
+    info("Running: " + " ".join(cmd))
+    subprocess.run(cmd, check=True)
+
+
+def patch_macos_plist(app_path: Path, bundle_id: str, icon_base_name: str = "appicon") -> None:
+    info("Patching macOS Info.plist")
+    plist_path = app_path / "Contents" / "Info.plist"
+    if not plist_path.exists():
+        info(f"No Info.plist at {plist_path}, skipping patch")
+        return
+    with plist_path.open("rb") as f:
+        data = plistlib.load(f)
+    data["CFBundleIdentifier"] = bundle_id
+    data["CFBundleName"] = data.get("CFBundleName") or app_path.stem
+    data["CFBundleDisplayName"] = data.get("CFBundleDisplayName") or app_path.stem
+    data["CFBundleIconFile"] = icon_base_name
+    data["CFBundleIconName"] = icon_base_name
+    with plist_path.open("wb") as f:
+        plistlib.dump(data, f)
+
+def make_dmg(app_path: Path, dmg_path: Path, volume_name: str) -> None:
+    info("Creating DMG")
+    staging = BUILD_DIR / "dmg_staging"
+    if staging.exists():
+        shutil.rmtree(staging)
+    (staging).mkdir(parents=True, exist_ok=True)
+    shutil.rmtree(staging / app_path.name, ignore_errors=True)
+    shutil.copytree(app_path, staging / app_path.name, symlinks=True)
+    try:
+        os.symlink("/Applications", staging / "Applications")
+    except FileExistsError:
+        pass
+    dmg_path.parent.mkdir(parents=True, exist_ok=True)
+    subprocess.run([
+        "hdiutil", "create", "-volname", volume_name,
+        "-srcfolder", str(staging),
+        "-format", "UDZO",
+        "-imagekey", "zlib-level=9",
+        str(dmg_path)
+    ], check=True)
+    shutil.rmtree(staging, ignore_errors=True)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--name", default="ChatMock")
+    parser.add_argument("--entry", default="gui.py")
+    parser.add_argument("--icon", default="icon.png")
+    parser.add_argument("--radius", type=float, default=0.22)
+    parser.add_argument("--square", action="store_true")
+    parser.add_argument("--dmg", action="store_true")
+    parser.add_argument("--dmg-only", action="store_true")
+    args = parser.parse_args()
+
+    ensure_dirs()
+    entry = ROOT / args.entry
+    icon_src = ROOT / args.icon
+    if args.dmg_only:
+        app_path = ROOT / "dist" / f"{args.name}.app"
+        if not app_path.exists():
+            raise SystemExit(f"App not found: {app_path}")
+        dmg = ROOT / "dist" / f"{args.name}.dmg"
+        make_dmg(app_path, dmg, args.name)
+        return
+    if not entry.exists():
+        raise SystemExit(f"Entry not found: {entry}")
+    if not icon_src.exists():
+        raise SystemExit(f"Icon PNG not found: {icon_src}")
+
+    os_name = platform.system().lower()
+    extra_data: list[tuple[Path, str]] = [
+        (ROOT / "prompt.md", "."),
+        (ROOT / "prompt_gpt5_codex.md", "."),
+    ]
+
+    bundle_icon: Path | None = None
+    rr = 0.0 if args.square else float(args.radius)
+    if os_name == "windows":
+        ico = ICONS_DIR / "appicon.ico"
+        make_windows_ico(icon_src, ico, rr)
+        bundle_icon = ico
+        extra_data.append((ico, "."))
+    elif os_name == "darwin":
+        icns = ICONS_DIR / "appicon.icns"
+        make_macos_icns(icon_src, icns, rr)
+        bundle_icon = icns
+        extra_data.append((icns, "."))
+    else:
+        png_copy = ICONS_DIR / "appicon.png"
+        if Image is not None:
+            square = load_icon_png(icon_src).resize((512, 512), Image.LANCZOS)
+            square = rounded(square, rr) if rr > 0 else square
+            square.save(png_copy)
+        else:
+            shutil.copy2(icon_src, png_copy)
+        extra_data.append((png_copy, "."))
+
+    run_pyinstaller(entry, args.name, bundle_icon, extra_data)
+    if os_name == "darwin":
+        app_path = ROOT / "dist" / f"{args.name}.app"
+        if app_path.exists():
+            bid = "com.chatmock.app"
+            patch_macos_plist(app_path, bundle_id=bid, icon_base_name="appicon")
+            if args.dmg:
+                dmg = ROOT / "dist" / f"{args.name}.dmg"
+                make_dmg(app_path, dmg, args.name)
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/build/lib/chatmock/__init__.py b/build/lib/chatmock/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ae049287c6b4e8e09b809d4c5cda847c9f64cc2
--- /dev/null
+++ b/build/lib/chatmock/__init__.py
@@ -0,0 +1,5 @@
+from __future__ import annotations
+
+from .app import create_app
+from .cli import main
+from .version import __version__
diff --git a/build/lib/chatmock/app.py b/build/lib/chatmock/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4541dc4028013cba5cca6dd086687a5f01e506d
--- /dev/null
+++ b/build/lib/chatmock/app.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+from flask import Flask, jsonify
+from flask_sock import Sock
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .http import build_cors_headers
+from .routes_openai import openai_bp
+from .routes_ollama import ollama_bp
+from .websocket_routes import register_websocket_routes
+
+
+def create_app(
+    verbose: bool = False,
+    verbose_obfuscation: bool = False,
+    reasoning_effort: str = "medium",
+    reasoning_summary: str = "auto",
+    reasoning_compat: str = "think-tags",
+    fast_mode: bool = False,
+    debug_model: str | None = None,
+    expose_reasoning_models: bool = False,
+    default_web_search: bool = False,
+) -> Flask:
+    app = Flask(__name__)
+
+    app.config.update(
+        VERBOSE=bool(verbose),
+        VERBOSE_OBFUSCATION=bool(verbose_obfuscation),
+        REASONING_EFFORT=reasoning_effort,
+        REASONING_SUMMARY=reasoning_summary,
+        REASONING_COMPAT=reasoning_compat,
+        FAST_MODE=bool(fast_mode),
+        DEBUG_MODEL=debug_model,
+        BASE_INSTRUCTIONS=BASE_INSTRUCTIONS,
+        GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS,
+        EXPOSE_REASONING_MODELS=bool(expose_reasoning_models),
+        DEFAULT_WEB_SEARCH=bool(default_web_search),
+    )
+
+    @app.get("/")
+    @app.get("/health")
+    def health():
+        return jsonify({"status": "ok"})
+
+    @app.after_request
+    def _cors(resp):
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    app.register_blueprint(openai_bp)
+    app.register_blueprint(ollama_bp)
+    sock = Sock(app)
+    register_websocket_routes(sock)
+
+    return app
diff --git a/build/lib/chatmock/cli.py b/build/lib/chatmock/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..8482cf38b201f61b2c1adf55107c3906139bc20e
--- /dev/null
+++ b/build/lib/chatmock/cli.py
@@ -0,0 +1,425 @@
+from __future__ import annotations
+
+import errno
+import argparse
+import json
+import os
+import sys
+import webbrowser
+from datetime import datetime
+
+from .app import create_app
+from .config import CLIENT_ID_DEFAULT
+from .limits import RateLimitWindow, compute_reset_at, load_rate_limit_snapshot
+from .oauth import OAuthHTTPServer, OAuthHandler, REQUIRED_PORT, URL_BASE
+from .utils import eprint, get_home_dir, load_chatgpt_tokens, parse_jwt_claims, read_auth_file
+
+
+_STATUS_LIMIT_BAR_SEGMENTS = 30
+_STATUS_LIMIT_BAR_FILLED = "█"
+_STATUS_LIMIT_BAR_EMPTY = "░"
+_STATUS_LIMIT_BAR_PARTIAL = "▓"
+
+
+def _clamp_percent(value: float) -> float:
+    try:
+        percent = float(value)
+    except Exception:
+        return 0.0
+    if percent != percent:
+        return 0.0
+    if percent < 0.0:
+        return 0.0
+    if percent > 100.0:
+        return 100.0
+    return percent
+
+
+def _render_progress_bar(percent_used: float) -> str:
+    ratio = max(0.0, min(1.0, percent_used / 100.0))
+    filled_exact = ratio * _STATUS_LIMIT_BAR_SEGMENTS
+    filled = int(filled_exact)
+    partial = filled_exact - filled
+    
+    has_partial = partial > 0.5
+    if has_partial:
+        filled += 1
+    
+    filled = max(0, min(_STATUS_LIMIT_BAR_SEGMENTS, filled))
+    empty = _STATUS_LIMIT_BAR_SEGMENTS - filled
+    
+    if has_partial and filled > 0:
+        bar = _STATUS_LIMIT_BAR_FILLED * (filled - 1) + _STATUS_LIMIT_BAR_PARTIAL + _STATUS_LIMIT_BAR_EMPTY * empty
+    else:
+        bar = _STATUS_LIMIT_BAR_FILLED * filled + _STATUS_LIMIT_BAR_EMPTY * empty
+    
+    return f"[{bar}]"
+
+
+def _get_usage_color(percent_used: float) -> str:
+    if percent_used >= 90:
+        return "\033[91m" 
+    elif percent_used >= 75:
+        return "\033[93m"  
+    elif percent_used >= 50:
+        return "\033[94m"  
+    else:
+        return "\033[92m" 
+
+
+def _reset_color() -> str:
+    """ANSI reset color code"""
+    return "\033[0m"
+
+
+def _format_window_duration(minutes: int | None) -> str | None:
+    if minutes is None:
+        return None
+    try:
+        total = int(minutes)
+    except Exception:
+        return None
+    if total <= 0:
+        return None
+    minutes = total
+    weeks, remainder = divmod(minutes, 7 * 24 * 60)
+    days, remainder = divmod(remainder, 24 * 60)
+    hours, remainder = divmod(remainder, 60)
+    parts = []
+    if weeks:
+        parts.append(f"{weeks} week" + ("s" if weeks != 1 else ""))
+    if days:
+        parts.append(f"{days} day" + ("s" if days != 1 else ""))
+    if hours:
+        parts.append(f"{hours} hour" + ("s" if hours != 1 else ""))
+    if remainder:
+        parts.append(f"{remainder} minute" + ("s" if remainder != 1 else ""))
+    if not parts:
+        parts.append(f"{minutes} minute" + ("s" if minutes != 1 else ""))
+    return " ".join(parts)
+
+
+def _format_reset_duration(seconds: int | None) -> str | None:
+    if seconds is None:
+        return None
+    try:
+        value = int(seconds)
+    except Exception:
+        return None
+    if value < 0:
+        value = 0
+    days, remainder = divmod(value, 86400)
+    hours, remainder = divmod(remainder, 3600)
+    minutes, remainder = divmod(remainder, 60)
+    parts: list[str] = []
+    if days:
+        parts.append(f"{days}d")
+    if hours:
+        parts.append(f"{hours}h")
+    if minutes:
+        parts.append(f"{minutes}m")
+    if not parts and remainder:
+        parts.append("under 1m")
+    if not parts:
+        parts.append("0m")
+    return " ".join(parts)
+
+
+def _format_local_datetime(dt: datetime) -> str:
+    local = dt.astimezone()
+    tz_name = local.tzname() or "local"
+    return f"{local.strftime('%b %d, %Y %H:%M')} {tz_name}"
+
+
+def _print_usage_limits_block() -> None:
+    stored = load_rate_limit_snapshot()
+    
+    print("📊 Usage Limits")
+    
+    if stored is None:
+        print("  No usage data available yet. Send a request through ChatMock first.")
+        print()
+        return
+
+    update_time = _format_local_datetime(stored.captured_at)
+    print(f"Last updated: {update_time}")
+    print()
+
+    windows: list[tuple[str, str, RateLimitWindow]] = []
+    if stored.snapshot.primary is not None:
+        windows.append(("⚡", "5 hour limit", stored.snapshot.primary))
+    if stored.snapshot.secondary is not None:
+        windows.append(("📅", "Weekly limit", stored.snapshot.secondary))
+
+    if not windows:
+        print("  Usage data was captured but no limit windows were provided.")
+        print()
+        return
+
+    for i, (icon_label, desc, window) in enumerate(windows):
+        if i > 0:
+            print()
+        
+        percent_used = _clamp_percent(window.used_percent)
+        remaining = max(0.0, 100.0 - percent_used)
+        color = _get_usage_color(percent_used)
+        reset = _reset_color()
+        
+        progress = _render_progress_bar(percent_used)
+        usage_text = f"{percent_used:5.1f}% used"
+        remaining_text = f"{remaining:5.1f}% left"
+        
+        print(f"{icon_label} {desc}")
+        print(f"{color}{progress}{reset} {color}{usage_text}{reset} | {remaining_text}")
+        
+        reset_in = _format_reset_duration(window.resets_in_seconds)
+        reset_at = compute_reset_at(stored.captured_at, window)
+        
+        if reset_in and reset_at:
+            reset_at_str = _format_local_datetime(reset_at)
+            print(f"    ⏳ Resets in: {reset_in} at {reset_at_str}")
+        elif reset_in:
+            print(f"    ⏳ Resets in: {reset_in}")
+        elif reset_at:
+            reset_at_str = _format_local_datetime(reset_at)
+            print(f"    ⏳ Resets at: {reset_at_str}")
+
+    print()
+
+def cmd_login(no_browser: bool, verbose: bool) -> int:
+    home_dir = get_home_dir()
+    client_id = CLIENT_ID_DEFAULT
+    if not client_id:
+        eprint("ERROR: No OAuth client id configured. Set CHATGPT_LOCAL_CLIENT_ID.")
+        return 1
+
+    try:
+        bind_host = os.getenv("CHATGPT_LOCAL_LOGIN_BIND", "127.0.0.1")
+        httpd = OAuthHTTPServer((bind_host, REQUIRED_PORT), OAuthHandler, home_dir=home_dir, client_id=client_id, verbose=verbose)
+    except OSError as e:
+        eprint(f"ERROR: {e}")
+        if e.errno == errno.EADDRINUSE:
+            return 13
+        return 1
+
+    auth_url = httpd.auth_url()
+    with httpd:
+        eprint(f"Starting local login server on {URL_BASE}")
+        if not no_browser:
+            try:
+                webbrowser.open(auth_url, new=1, autoraise=True)
+            except Exception as e:
+                eprint(f"Failed to open browser: {e}")
+        eprint(f"If your browser did not open, navigate to:\n{auth_url}")
+
+        def _stdin_paste_worker() -> None:
+            try:
+                eprint(
+                    "If the browser can't reach this machine, paste the full redirect URL here and press Enter (or leave blank to keep waiting):"
+                )
+                line = sys.stdin.readline().strip()
+                if not line:
+                    return
+                try:
+                    from urllib.parse import urlparse, parse_qs
+
+                    parsed = urlparse(line)
+                    params = parse_qs(parsed.query)
+                    code = (params.get("code") or [None])[0]
+                    state = (params.get("state") or [None])[0]
+                    if not code:
+                        eprint("Input did not contain an auth code. Ignoring.")
+                        return
+                    if state and state != httpd.state:
+                        eprint("State mismatch. Ignoring pasted URL for safety.")
+                        return
+                    eprint("Received redirect URL. Completing login without callback…")
+                    bundle, _ = httpd.exchange_code(code)
+                    if httpd.persist_auth(bundle):
+                        httpd.exit_code = 0
+                        eprint("Login successful. Tokens saved.")
+                    else:
+                        eprint("ERROR: Unable to persist auth file.")
+                    httpd.shutdown()
+                except Exception as exc:
+                    eprint(f"Failed to process pasted redirect URL: {exc}")
+            except Exception:
+                pass
+
+        try:
+            import threading
+
+            threading.Thread(target=_stdin_paste_worker, daemon=True).start()
+        except Exception:
+            pass
+        try:
+            httpd.serve_forever()
+        except KeyboardInterrupt:
+            eprint("\nKeyboard interrupt received, exiting.")
+        return httpd.exit_code
+
+
+def cmd_serve(
+    host: str,
+    port: int,
+    verbose: bool,
+    verbose_obfuscation: bool,
+    reasoning_effort: str,
+    reasoning_summary: str,
+    reasoning_compat: str,
+    fast_mode: bool,
+    debug_model: str | None,
+    expose_reasoning_models: bool,
+    default_web_search: bool,
+) -> int:
+    app = create_app(
+        verbose=verbose,
+        verbose_obfuscation=verbose_obfuscation,
+        reasoning_effort=reasoning_effort,
+        reasoning_summary=reasoning_summary,
+        reasoning_compat=reasoning_compat,
+        fast_mode=fast_mode,
+        debug_model=debug_model,
+        expose_reasoning_models=expose_reasoning_models,
+        default_web_search=default_web_search,
+    )
+
+    app.run(host=host, use_reloader=False, port=port, threaded=True)
+    return 0
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="ChatMock: login & OpenAI-compatible proxy")
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    p_login = sub.add_parser("login", help="Authorize with ChatGPT and store tokens")
+    p_login.add_argument("--no-browser", action="store_true", help="Do not open the browser automatically")
+    p_login.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+
+    p_serve = sub.add_parser("serve", help="Run local OpenAI-compatible server")
+    p_serve.add_argument("--host", default="127.0.0.1")
+    p_serve.add_argument("--port", type=int, default=8000)
+    p_serve.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+    p_serve.add_argument(
+        "--verbose-obfuscation",
+        action="store_true",
+        help="Also dump raw SSE/obfuscation events (in addition to --verbose request/response logs).",
+    )
+    p_serve.add_argument(
+        "--debug-model",
+        dest="debug_model",
+        default=os.getenv("CHATGPT_LOCAL_DEBUG_MODEL"),
+        help="Forcibly override requested 'model' with this value",
+    )
+    p_serve.add_argument(
+        "--fast-mode",
+        action=argparse.BooleanOptionalAction,
+        default=(os.getenv("CHATGPT_LOCAL_FAST_MODE") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help="Enable GPT fast mode by default for supported models; request-level overrides still take precedence.",
+    )
+    p_serve.add_argument(
+        "--reasoning-effort",
+        choices=["none", "minimal", "low", "medium", "high", "xhigh"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_EFFORT", "medium").lower(),
+        help="Reasoning effort level for Responses API (default: medium)",
+    )
+    p_serve.add_argument(
+        "--reasoning-summary",
+        choices=["auto", "concise", "detailed", "none"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_SUMMARY", "auto").lower(),
+        help="Reasoning summary verbosity (default: auto)",
+    )
+    p_serve.add_argument(
+        "--reasoning-compat",
+        choices=["legacy", "o3", "think-tags", "current"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_COMPAT", "think-tags").lower(),
+        help=(
+            "Compatibility mode for exposing reasoning to clients (legacy|o3|think-tags). "
+            "'current' is accepted as an alias for 'legacy'"
+        ),
+    )
+    p_serve.add_argument(
+        "--expose-reasoning-models",
+        action="store_true",
+        default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help=(
+            "Expose GPT-5 family reasoning effort variants (none|minimal|low|medium|high|xhigh where supported) "
+            "as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs."
+        ),
+    )
+    p_serve.add_argument(
+        "--enable-web-search",
+        action=argparse.BooleanOptionalAction,
+        default=(os.getenv("CHATGPT_LOCAL_ENABLE_WEB_SEARCH") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help=(
+            "Enable default web_search tool when a request omits responses_tools (off by default). "
+            "Also configurable via CHATGPT_LOCAL_ENABLE_WEB_SEARCH."
+        ),
+    )
+
+    p_info = sub.add_parser("info", help="Print current stored tokens and derived account id")
+    p_info.add_argument("--json", action="store_true", help="Output raw auth.json contents")
+
+    args = parser.parse_args()
+
+    if args.command == "login":
+        sys.exit(cmd_login(no_browser=args.no_browser, verbose=args.verbose))
+    elif args.command == "serve":
+        sys.exit(
+            cmd_serve(
+                host=args.host,
+                port=args.port,
+                verbose=args.verbose,
+                verbose_obfuscation=args.verbose_obfuscation,
+                reasoning_effort=args.reasoning_effort,
+                reasoning_summary=args.reasoning_summary,
+                reasoning_compat=args.reasoning_compat,
+                fast_mode=args.fast_mode,
+                debug_model=args.debug_model,
+                expose_reasoning_models=args.expose_reasoning_models,
+                default_web_search=args.enable_web_search,
+            )
+        )
+    elif args.command == "info":
+        auth = read_auth_file()
+        if getattr(args, "json", False):
+            print(json.dumps(auth or {}, indent=2))
+            sys.exit(0)
+        access_token, account_id, id_token = load_chatgpt_tokens()
+        if not access_token or not id_token:
+            print("👤 Account")
+            print("  • Not signed in")
+            print("  • Run: python3 chatmock.py login")
+            print("")
+            _print_usage_limits_block()
+            sys.exit(0)
+
+        id_claims = parse_jwt_claims(id_token) or {}
+        access_claims = parse_jwt_claims(access_token) or {}
+
+        email = id_claims.get("email") or id_claims.get("preferred_username") or "<unknown>"
+        plan_raw = (access_claims.get("https://api.openai.com/auth") or {}).get("chatgpt_plan_type") or "unknown"
+        plan_map = {
+            "plus": "Plus",
+            "pro": "Pro",
+            "free": "Free",
+            "team": "Team",
+            "enterprise": "Enterprise",
+        }
+        plan = plan_map.get(str(plan_raw).lower(), str(plan_raw).title() if isinstance(plan_raw, str) else "Unknown")
+
+        print("👤 Account")
+        print("  • Signed in with ChatGPT")
+        print(f"  • Login: {email}")
+        print(f"  • Plan: {plan}")
+        if account_id:
+            print(f"  • Account ID: {account_id}")
+        print("")
+        _print_usage_limits_block()
+        sys.exit(0)
+    else:
+        parser.error("Unknown command")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/build/lib/chatmock/config.py b/build/lib/chatmock/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc5ca817b47d23d86c2d32e2e5492fbe11ce9463
--- /dev/null
+++ b/build/lib/chatmock/config.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+
+
+CLIENT_ID_DEFAULT = os.getenv("CHATGPT_LOCAL_CLIENT_ID") or "app_EMoamEEZ73f0CkXaXp7hrann"
+OAUTH_ISSUER_DEFAULT = os.getenv("CHATGPT_LOCAL_ISSUER") or "https://auth.openai.com"
+OAUTH_TOKEN_URL = f"{OAUTH_ISSUER_DEFAULT}/oauth/token"
+
+CHATGPT_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses"
+
+
+def _read_prompt_text(filename: str) -> str | None:
+    candidates = [
+        Path(__file__).parent.parent / filename,
+        Path(__file__).parent / filename,
+        Path(getattr(sys, "_MEIPASS", "")) / filename if getattr(sys, "_MEIPASS", None) else None,
+        Path.cwd() / filename,
+    ]
+    for candidate in candidates:
+        if not candidate:
+            continue
+        try:
+            if candidate.exists():
+                content = candidate.read_text(encoding="utf-8")
+                if isinstance(content, str) and content.strip():
+                    return content
+        except Exception:
+            continue
+    return None
+
+
+def read_base_instructions() -> str:
+    content = _read_prompt_text("prompt.md")
+    if content is None:
+        raise FileNotFoundError("Failed to read prompt.md; expected adjacent to package or CWD.")
+    return content
+
+
+def read_gpt5_codex_instructions(fallback: str) -> str:
+    content = _read_prompt_text("prompt_gpt5_codex.md")
+    return content if isinstance(content, str) and content.strip() else fallback
+
+
+BASE_INSTRUCTIONS = read_base_instructions()
+GPT5_CODEX_INSTRUCTIONS = read_gpt5_codex_instructions(BASE_INSTRUCTIONS)
diff --git a/build/lib/chatmock/fast_mode.py b/build/lib/chatmock/fast_mode.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dbb557c41fcc23b83e6cc4c42284122b6b90d87
--- /dev/null
+++ b/build/lib/chatmock/fast_mode.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from .model_registry import normalize_model_name
+
+
+PRIORITY_SUPPORTED_MODELS = frozenset(
+    (
+        "gpt-5.4",
+        "gpt-5.2",
+        "gpt-5.1",
+        "gpt-5",
+        "gpt-5.1-codex",
+        "gpt-5-codex",
+    )
+)
+
+_TRUE_STRINGS = {"1", "true", "yes", "on"}
+_FALSE_STRINGS = {"0", "false", "no", "off"}
+
+
+def parse_optional_bool(value: Any) -> bool | None:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in _TRUE_STRINGS:
+            return True
+        if normalized in _FALSE_STRINGS:
+            return False
+    return None
+
+
+def supports_priority_service_tier(model: str | None) -> bool:
+    return normalize_model_name(model) in PRIORITY_SUPPORTED_MODELS
+
+
+@dataclass(frozen=True)
+class ServiceTierResolution:
+    service_tier: str | None
+    error_message: str | None = None
+    warning_message: str | None = None
+    used_server_default: bool = False
+
+
+def resolve_service_tier(
+    model: str | None,
+    *,
+    request_fast_mode: Any = None,
+    request_service_tier: Any = None,
+    server_fast_mode: bool = False,
+) -> ServiceTierResolution:
+    explicit_fast_mode = parse_optional_bool(request_fast_mode)
+
+    tier: str | None = None
+    explicit_request = False
+    used_server_default = False
+
+    if explicit_fast_mode is not None:
+        tier = "priority" if explicit_fast_mode else None
+        explicit_request = True
+    elif isinstance(request_service_tier, str) and request_service_tier.strip():
+        tier = request_service_tier.strip().lower()
+        explicit_request = True
+    elif server_fast_mode:
+        tier = "priority"
+        used_server_default = True
+
+    if tier == "priority" and not supports_priority_service_tier(model):
+        normalized = normalize_model_name(model)
+        message = (
+            f"Fast mode is not supported for model '{normalized}'. "
+            "Use a supported GPT-5 priority-processing model or disable fast mode for this request."
+        )
+        if explicit_request:
+            return ServiceTierResolution(
+                service_tier=None,
+                error_message=message,
+                used_server_default=used_server_default,
+            )
+        return ServiceTierResolution(
+            service_tier=None,
+            warning_message=message,
+            used_server_default=used_server_default,
+        )
+
+    return ServiceTierResolution(
+        service_tier=tier,
+        used_server_default=used_server_default,
+    )
diff --git a/build/lib/chatmock/http.py b/build/lib/chatmock/http.py
new file mode 100644
index 0000000000000000000000000000000000000000..567093a4a409bfc27fb85311e533879a0e5e783a
--- /dev/null
+++ b/build/lib/chatmock/http.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from flask import Response, jsonify, request
+
+
+def build_cors_headers() -> dict:
+    origin = request.headers.get("Origin", "*")
+    req_headers = request.headers.get("Access-Control-Request-Headers")
+    allow_headers = req_headers if req_headers else "Authorization, Content-Type, Accept"
+    return {
+        "Access-Control-Allow-Origin": origin,
+        "Access-Control-Allow-Methods": "POST, GET, OPTIONS",
+        "Access-Control-Allow-Headers": allow_headers,
+        "Access-Control-Max-Age": "86400",
+    }
+
+
+def json_error(message: str, status: int = 400) -> Response:
+    resp = jsonify({"error": {"message": message}})
+    response: Response = Response(response=resp.response, status=status, mimetype="application/json")
+    for k, v in build_cors_headers().items():
+        response.headers.setdefault(k, v)
+    return response
+
diff --git a/build/lib/chatmock/limits.py b/build/lib/chatmock/limits.py
new file mode 100644
index 0000000000000000000000000000000000000000..862076c5ec4fafa65e1f088db44ce7bf03d63fde
--- /dev/null
+++ b/build/lib/chatmock/limits.py
@@ -0,0 +1,200 @@
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from typing import Any, Mapping, Optional
+
+from .utils import get_home_dir
+
+_PRIMARY_USED = "x-codex-primary-used-percent"
+_PRIMARY_WINDOW = "x-codex-primary-window-minutes"
+_PRIMARY_RESET = "x-codex-primary-reset-after-seconds"
+_SECONDARY_USED = "x-codex-secondary-used-percent"
+_SECONDARY_WINDOW = "x-codex-secondary-window-minutes"
+_SECONDARY_RESET = "x-codex-secondary-reset-after-seconds"
+
+_LIMITS_FILENAME = "usage_limits.json"
+
+
+@dataclass
+class RateLimitWindow:
+    used_percent: float
+    window_minutes: Optional[int]
+    resets_in_seconds: Optional[int]
+
+
+@dataclass
+class RateLimitSnapshot:
+    primary: Optional[RateLimitWindow]
+    secondary: Optional[RateLimitWindow]
+
+
+@dataclass
+class StoredRateLimitSnapshot:
+    captured_at: datetime
+    snapshot: RateLimitSnapshot
+
+
+def _parse_float(value: Any) -> Optional[float]:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, (int, float)):
+            return float(value)
+        value_str = str(value).strip()
+        if not value_str:
+            return None
+        parsed = float(value_str)
+        if not (parsed == parsed and parsed not in (float("inf"), float("-inf"))):
+            return None
+        return parsed
+    except Exception:
+        return None
+
+
+def _parse_int(value: Any) -> Optional[int]:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, bool):
+            return None
+        if isinstance(value, int):
+            return value
+        value_str = str(value).strip()
+        if not value_str:
+            return None
+        return int(value_str)
+    except Exception:
+        return None
+
+
+def _parse_window(headers: Mapping[str, Any], used_key: str, window_key: str, reset_key: str) -> Optional[RateLimitWindow]:
+    used_percent = _parse_float(headers.get(used_key))
+    if used_percent is None:
+        return None
+    window_minutes = _parse_int(headers.get(window_key))
+    resets_in_seconds = _parse_int(headers.get(reset_key))
+    return RateLimitWindow(used_percent=used_percent, window_minutes=window_minutes, resets_in_seconds=resets_in_seconds)
+
+
+def parse_rate_limit_headers(headers: Mapping[str, Any]) -> Optional[RateLimitSnapshot]:
+    try:
+        primary = _parse_window(headers, _PRIMARY_USED, _PRIMARY_WINDOW, _PRIMARY_RESET)
+        secondary = _parse_window(headers, _SECONDARY_USED, _SECONDARY_WINDOW, _SECONDARY_RESET)
+        if primary is None and secondary is None:
+            return None
+        return RateLimitSnapshot(primary=primary, secondary=secondary)
+    except Exception:
+        return None
+
+
+def _limits_path() -> str:
+    home = get_home_dir()
+    return os.path.join(home, _LIMITS_FILENAME)
+
+
+def store_rate_limit_snapshot(snapshot: RateLimitSnapshot, captured_at: Optional[datetime] = None) -> None:
+    captured = captured_at or datetime.now(timezone.utc)
+    try:
+        home = get_home_dir()
+        os.makedirs(home, exist_ok=True)
+        payload: dict[str, Any] = {
+            "captured_at": captured.isoformat(),
+        }
+        if snapshot.primary:
+            payload["primary"] = {
+                "used_percent": snapshot.primary.used_percent,
+                "window_minutes": snapshot.primary.window_minutes,
+                "resets_in_seconds": snapshot.primary.resets_in_seconds,
+            }
+        if snapshot.secondary:
+            payload["secondary"] = {
+                "used_percent": snapshot.secondary.used_percent,
+                "window_minutes": snapshot.secondary.window_minutes,
+                "resets_in_seconds": snapshot.secondary.resets_in_seconds,
+            }
+        with open(_limits_path(), "w", encoding="utf-8") as fp:
+            if hasattr(os, "fchmod"):
+                try:
+                    os.fchmod(fp.fileno(), 0o600)
+                except OSError:
+                    pass
+            json.dump(payload, fp, indent=2)
+    except Exception:
+        # Silently ignore persistence errors.
+        pass
+
+
+def load_rate_limit_snapshot() -> Optional[StoredRateLimitSnapshot]:
+    try:
+        with open(_limits_path(), "r", encoding="utf-8") as fp:
+            raw = json.load(fp)
+    except FileNotFoundError:
+        return None
+    except Exception:
+        return None
+
+    captured_raw = raw.get("captured_at")
+    captured_at = _parse_datetime(captured_raw)
+    if captured_at is None:
+        return None
+
+    snapshot = RateLimitSnapshot(
+        primary=_dict_to_window(raw.get("primary")),
+        secondary=_dict_to_window(raw.get("secondary")),
+    )
+    if snapshot.primary is None and snapshot.secondary is None:
+        return None
+    return StoredRateLimitSnapshot(captured_at=captured_at, snapshot=snapshot)
+
+
+def _parse_datetime(value: Any) -> Optional[datetime]:
+    if not isinstance(value, str):
+        return None
+    text = value.strip()
+    if not text:
+        return None
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        dt = datetime.fromisoformat(text)
+        if dt.tzinfo is None:
+            return dt.replace(tzinfo=timezone.utc)
+        return dt
+    except ValueError:
+        return None
+
+
+def _dict_to_window(value: Any) -> Optional[RateLimitWindow]:
+    if not isinstance(value, dict):
+        return None
+    used = _parse_float(value.get("used_percent"))
+    if used is None:
+        return None
+    window = _parse_int(value.get("window_minutes"))
+    resets = _parse_int(value.get("resets_in_seconds"))
+    return RateLimitWindow(used_percent=used, window_minutes=window, resets_in_seconds=resets)
+
+
+def record_rate_limits_from_response(response: Any) -> None:
+    if response is None:
+        return
+    headers = getattr(response, "headers", None)
+    if headers is None:
+        return
+    snapshot = parse_rate_limit_headers(headers)
+    if snapshot is None:
+        return
+    store_rate_limit_snapshot(snapshot)
+
+
+def compute_reset_at(captured_at: datetime, window: RateLimitWindow) -> Optional[datetime]:
+    if window.resets_in_seconds is None:
+        return None
+    try:
+        return captured_at + timedelta(seconds=int(window.resets_in_seconds))
+    except Exception:
+        return None
+
diff --git a/build/lib/chatmock/model_registry.py b/build/lib/chatmock/model_registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bddbeb0bcfc076b803912e15cd7e5306adbf0e4
--- /dev/null
+++ b/build/lib/chatmock/model_registry.py
@@ -0,0 +1,198 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable
+
+
+ALL_REASONING_EFFORTS = ("none", "minimal", "low", "medium", "high", "xhigh")
+DEFAULT_REASONING_EFFORTS = frozenset(ALL_REASONING_EFFORTS)
+
+
+@dataclass(frozen=True)
+class ModelSpec:
+    public_id: str
+    upstream_id: str
+    aliases: tuple[str, ...]
+    allowed_efforts: frozenset[str]
+    variant_efforts: tuple[str, ...]
+    uses_codex_instructions: bool = False
+
+
+_MODEL_SPECS = (
+    ModelSpec(
+        public_id="gpt-5",
+        upstream_id="gpt-5",
+        aliases=("gpt5", "gpt-5-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=("high", "medium", "low", "minimal"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.1",
+        upstream_id="gpt-5.1",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=("high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.2",
+        upstream_id="gpt-5.2",
+        aliases=("gpt5.2", "gpt-5.2-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.4",
+        upstream_id="gpt-5.4",
+        aliases=("gpt5.4", "gpt-5.4-latest"),
+        allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low", "none"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.4-mini",
+        upstream_id="gpt-5.4-mini",
+        aliases=("gpt5.4-mini", "gpt-5.4-mini-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.3-codex",
+        upstream_id="gpt-5.3-codex",
+        aliases=("gpt5.3-codex", "gpt-5.3-codex-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.3-codex-spark",
+        upstream_id="gpt-5.3-codex-spark",
+        aliases=("gpt5.3-codex-spark", "gpt-5.3-codex-spark-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5-codex",
+        upstream_id="gpt-5-codex",
+        aliases=("gpt5-codex", "gpt-5-codex-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=("high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.2-codex",
+        upstream_id="gpt-5.2-codex",
+        aliases=("gpt5.2-codex", "gpt-5.2-codex-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex",
+        upstream_id="gpt-5.1-codex",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=("high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex-max",
+        upstream_id="gpt-5.1-codex-max",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex-mini",
+        upstream_id="gpt-5.1-codex-mini",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=(),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="codex-mini",
+        upstream_id="codex-mini-latest",
+        aliases=("codex", "codex-mini-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=(),
+        uses_codex_instructions=True,
+    ),
+)
+
+_SPECS_BY_UPSTREAM = {spec.upstream_id: spec for spec in _MODEL_SPECS}
+_ALIASES = {}
+for _spec in _MODEL_SPECS:
+    _ALIASES[_spec.public_id] = _spec.upstream_id
+    for _alias in _spec.aliases:
+        _ALIASES[_alias] = _spec.upstream_id
+
+
+def _strip_model_name(model: str | None) -> tuple[str, str | None]:
+    if not isinstance(model, str):
+        return "", None
+    value = model.strip().lower()
+    if not value:
+        return "", None
+    if ":" in value:
+        base, maybe_effort = value.rsplit(":", 1)
+        if maybe_effort in DEFAULT_REASONING_EFFORTS:
+            return base, maybe_effort
+    for separator in ("-", "_"):
+        for effort in ALL_REASONING_EFFORTS:
+            suffix = f"{separator}{effort}"
+            if value.endswith(suffix):
+                return value[: -len(suffix)], effort
+    return value, None
+
+
+def model_spec_for_name(model: str | None) -> ModelSpec | None:
+    base, _ = _strip_model_name(model)
+    upstream_id = _ALIASES.get(base)
+    if not upstream_id:
+        return None
+    return _SPECS_BY_UPSTREAM.get(upstream_id)
+
+
+def normalize_model_name(model: str | None, debug_model: str | None = None) -> str:
+    if isinstance(debug_model, str) and debug_model.strip():
+        return debug_model.strip()
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.upstream_id
+    base, _ = _strip_model_name(model)
+    return base or "gpt-5.4"
+
+
+def uses_codex_instructions(model: str | None) -> bool:
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.uses_codex_instructions
+    return "codex" in ((model or "").strip().lower())
+
+
+def allowed_efforts_for_model(model: str | None) -> frozenset[str]:
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.allowed_efforts
+    return DEFAULT_REASONING_EFFORTS
+
+
+def extract_reasoning_from_model_name(model: str | None) -> dict[str, str] | None:
+    _, effort = _strip_model_name(model)
+    if not effort:
+        return None
+    return {"effort": effort}
+
+
+def list_public_models(expose_reasoning_models: bool = False) -> list[str]:
+    model_ids: list[str] = []
+    for spec in _MODEL_SPECS:
+        model_ids.append(spec.public_id)
+        if expose_reasoning_models:
+            model_ids.extend(f"{spec.public_id}-{effort}" for effort in spec.variant_efforts)
+    return model_ids
+
+
+def iter_public_models() -> Iterable[ModelSpec]:
+    return _MODEL_SPECS
diff --git a/build/lib/chatmock/models.py b/build/lib/chatmock/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb19ac49773ed42a158d7253646c58ae1b7739d9
--- /dev/null
+++ b/build/lib/chatmock/models.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class TokenData:
+    id_token: str
+    access_token: str
+    refresh_token: str
+    account_id: str
+
+
+@dataclass
+class AuthBundle:
+    api_key: Optional[str]
+    token_data: TokenData
+    last_refresh: str
+
+
+@dataclass
+class PkceCodes:
+    code_verifier: str
+    code_challenge: str
+
diff --git a/build/lib/chatmock/oauth.py b/build/lib/chatmock/oauth.py
new file mode 100644
index 0000000000000000000000000000000000000000..2659498abf4dc655bbca656b62841319791b1ceb
--- /dev/null
+++ b/build/lib/chatmock/oauth.py
@@ -0,0 +1,340 @@
+from __future__ import annotations
+
+import datetime
+import ssl
+import http.server
+import json
+import secrets
+import threading
+import time
+import urllib.parse
+import urllib.request
+from typing import Any, Dict, Tuple
+
+import certifi
+
+from .config import OAUTH_ISSUER_DEFAULT
+from .models import AuthBundle, PkceCodes, TokenData
+from .utils import eprint, generate_pkce, parse_jwt_claims, write_auth_file
+
+
+REQUIRED_PORT = 1455
+URL_BASE = f"http://localhost:{REQUIRED_PORT}"
+DEFAULT_ISSUER = OAUTH_ISSUER_DEFAULT
+
+
+LOGIN_SUCCESS_HTML = """<!DOCTYPE html>
+<html lang=\"en\">
+  <head>
+    <meta charset=\"utf-8\" />
+    <title>Login successful</title>
+  </head>
+  <body>
+    <div style=\"max-width: 640px; margin: 80px auto; font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;\"> 
+      <h1>Login successful</h1>
+      <p>You can now close this window and return to the terminal and run <code>python3 chatmock.py serve</code> to start the server.</p>
+    </div>
+  </body>
+  </html>
+"""
+
+_SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
+
+class OAuthHTTPServer(http.server.HTTPServer):
+    def __init__(
+        self,
+        server_address: tuple[str, int],
+        request_handler_class: type[http.server.BaseHTTPRequestHandler],
+        *,
+        home_dir: str,
+        client_id: str,
+        verbose: bool = False,
+    ) -> None:
+        super().__init__(server_address, request_handler_class, bind_and_activate=True)
+        self.exit_code = 1
+        self.home_dir = home_dir
+        self.verbose = verbose
+        self.issuer = DEFAULT_ISSUER
+        self.token_endpoint = f"{self.issuer}/oauth/token"
+        self.client_id = client_id
+        port = server_address[1]
+        self.redirect_uri = f"http://localhost:{port}/auth/callback"
+        self.pkce = generate_pkce()
+        self.state = secrets.token_hex(32)
+
+    def auth_url(self) -> str:
+        params = {
+            "response_type": "code",
+            "client_id": self.client_id,
+            "redirect_uri": self.redirect_uri,
+            "scope": "openid profile email offline_access",
+            "code_challenge": self.pkce.code_challenge,
+            "code_challenge_method": "S256",
+            "id_token_add_organizations": "true",
+            "codex_cli_simplified_flow": "true",
+            "state": self.state,
+        }
+        return f"{self.issuer}/oauth/authorize?" + urllib.parse.urlencode(params)
+
+    def exchange_code(self, code: str) -> tuple[AuthBundle, str]:
+        data = urllib.parse.urlencode(
+            {
+                "grant_type": "authorization_code",
+                "code": code,
+                "redirect_uri": self.redirect_uri,
+                "client_id": self.client_id,
+                "code_verifier": self.pkce.code_verifier,
+            }
+        ).encode()
+
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.token_endpoint,
+                data=data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            payload = json.loads(resp.read().decode())
+
+        id_token = payload.get("id_token", "")
+        access_token = payload.get("access_token", "")
+        refresh_token = payload.get("refresh_token", "")
+
+        id_token_claims = parse_jwt_claims(id_token)
+        access_token_claims = parse_jwt_claims(access_token)
+
+        auth_claims = (id_token_claims or {}).get("https://api.openai.com/auth", {})
+        chatgpt_account_id = auth_claims.get("chatgpt_account_id", "")
+
+        token_data = TokenData(
+            id_token=id_token,
+            access_token=access_token,
+            refresh_token=refresh_token,
+            account_id=chatgpt_account_id,
+        )
+
+        api_key, success_url = self.maybe_obtain_api_key(
+            id_token_claims or {}, access_token_claims or {}, token_data
+        )
+
+        last_refresh_str = (
+            datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")
+        )
+        bundle = AuthBundle(api_key=api_key, token_data=token_data, last_refresh=last_refresh_str)
+        return bundle, success_url or f"{URL_BASE}/success"
+
+    def maybe_obtain_api_key(
+        self,
+        token_claims: Dict[str, Any],
+        access_claims: Dict[str, Any],
+        token_data: TokenData,
+    ) -> tuple[str | None, str | None]:
+        org_id = token_claims.get("organization_id")
+        project_id = token_claims.get("project_id")
+        if not org_id or not project_id:
+            query = {
+                "id_token": token_data.id_token,
+                "needs_setup": "false",
+                "org_id": org_id or "",
+                "project_id": project_id or "",
+                "plan_type": access_claims.get("chatgpt_plan_type"),
+                "platform_url": "https://platform.openai.com",
+            }
+            return None, f"{URL_BASE}/success?{urllib.parse.urlencode(query)}"
+
+        today = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
+        exchange_data = urllib.parse.urlencode(
+            {
+                "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
+                "client_id": self.client_id,
+                "requested_token": "openai-api-key",
+                "subject_token": token_data.id_token,
+                "subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
+                "name": f"ChatMock [auto-generated] ({today})",
+            }
+        ).encode()
+
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.token_endpoint,
+                data=exchange_data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            exchange_payload = json.loads(resp.read().decode())
+            exchanged_access_token = exchange_payload.get("access_token")
+
+        chatgpt_plan_type = access_claims.get("chatgpt_plan_type")
+        success_url_query = {
+            "id_token": token_data.id_token,
+            "access_token": token_data.access_token,
+            "refresh_token": token_data.refresh_token,
+            "exchanged_access_token": exchanged_access_token,
+            "org_id": org_id,
+            "project_id": project_id,
+            "plan_type": chatgpt_plan_type,
+            "platform_url": "https://platform.openai.com",
+        }
+        success_url = f"{URL_BASE}/success?{urllib.parse.urlencode(success_url_query)}"
+        return exchanged_access_token, success_url
+
+    def persist_auth(self, bundle: AuthBundle) -> bool:
+        auth_json_contents = {
+            "OPENAI_API_KEY": bundle.api_key,
+            "tokens": {
+                "id_token": bundle.token_data.id_token,
+                "access_token": bundle.token_data.access_token,
+                "refresh_token": bundle.token_data.refresh_token,
+                "account_id": bundle.token_data.account_id,
+            },
+            "last_refresh": bundle.last_refresh,
+        }
+        return write_auth_file(auth_json_contents)
+
+
+class OAuthHandler(http.server.BaseHTTPRequestHandler):
+    server: "OAuthHTTPServer"
+
+    def do_GET(self) -> None:
+        path = urllib.parse.urlparse(self.path).path
+        if path == "/success":
+            self._send_html(LOGIN_SUCCESS_HTML)
+            try:
+                self.wfile.flush()
+            except Exception as e:
+                eprint(f"Failed to flush response: {e}")
+            self._shutdown_after_delay(2.0)
+            return
+
+        if path != "/auth/callback":
+            self.send_error(404, "Not Found")
+            self._shutdown()
+            return
+
+        query = urllib.parse.urlparse(self.path).query
+        params = urllib.parse.parse_qs(query)
+
+        code = params.get("code", [None])[0]
+        if not code:
+            self.send_error(400, "Missing auth code")
+            self._shutdown()
+            return
+
+        try:
+            auth_bundle, success_url = self._exchange_code(code)
+        except Exception as exc:
+            self.send_error(500, f"Token exchange failed: {exc}")
+            self._shutdown()
+            return
+
+        auth_json_contents = {
+            "OPENAI_API_KEY": auth_bundle.api_key,
+            "tokens": {
+                "id_token": auth_bundle.token_data.id_token,
+                "access_token": auth_bundle.token_data.access_token,
+                "refresh_token": auth_bundle.token_data.refresh_token,
+                "account_id": auth_bundle.token_data.account_id,
+            },
+            "last_refresh": auth_bundle.last_refresh,
+        }
+        if write_auth_file(auth_json_contents):
+            self.server.exit_code = 0
+            self._send_html(LOGIN_SUCCESS_HTML)
+        else:
+            self.send_error(500, "Unable to persist auth file")
+        self._shutdown_after_delay(2.0)
+
+    def do_POST(self) -> None:
+        self.send_error(404, "Not Found")
+        self._shutdown()
+
+    def log_message(self, fmt: str, *args):
+        if getattr(self.server, "verbose", False):
+            super().log_message(fmt, *args)
+
+    def _send_redirect(self, url: str) -> None:
+        self.send_response(302)
+        self.send_header("Location", url)
+        self.end_headers()
+
+    def _send_html(self, body: str) -> None:
+        encoded = body.encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.send_header("Content-Length", str(len(encoded)))
+        self.end_headers()
+        self.wfile.write(encoded)
+
+    def _shutdown(self) -> None:
+        threading.Thread(target=self.server.shutdown, daemon=True).start()
+
+    def _shutdown_after_delay(self, seconds: float = 2.0) -> None:
+        def _later():
+            try:
+                time.sleep(seconds)
+            finally:
+                self._shutdown()
+
+        threading.Thread(target=_later, daemon=True).start()
+
+    def _exchange_code(self, code: str) -> Tuple[AuthBundle, str]:
+        return self.server.exchange_code(code)
+
+    def _maybe_obtain_api_key(
+        self,
+        token_claims: Dict[str, Any],
+        access_claims: Dict[str, Any],
+        token_data: TokenData,
+    ) -> Tuple[str | None, str | None]:
+        org_id = token_claims.get("organization_id")
+        project_id = token_claims.get("project_id")
+        if not org_id or not project_id:
+            query = {
+                "id_token": token_data.id_token,
+                "needs_setup": "false",
+                "org_id": org_id or "",
+                "project_id": project_id or "",
+                "plan_type": access_claims.get("chatgpt_plan_type"),
+                "platform_url": "https://platform.openai.com",
+            }
+            return None, f"{URL_BASE}/success?{urllib.parse.urlencode(query)}"
+
+        today = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
+        exchange_data = urllib.parse.urlencode(
+            {
+                "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
+                "client_id": self.server.client_id,
+                "requested_token": "openai-api-key",
+                "subject_token": token_data.id_token,
+                "subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
+                "name": f"ChatMock [auto-generated] ({today})",
+            }
+        ).encode()
+
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.server.token_endpoint,
+                data=exchange_data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            exchange_payload = json.loads(resp.read().decode())
+            exchanged_access_token = exchange_payload.get("access_token")
+
+        chatgpt_plan_type = access_claims.get("chatgpt_plan_type")
+        success_url_query = {
+            "id_token": token_data.id_token,
+            "needs_setup": "false",
+            "org_id": org_id,
+            "project_id": project_id,
+            "plan_type": chatgpt_plan_type,
+            "platform_url": "https://platform.openai.com",
+        }
+        success_url = f"{URL_BASE}/success?{urllib.parse.urlencode(success_url_query)}"
+        return exchanged_access_token, success_url
diff --git a/build/lib/chatmock/prompt.md b/build/lib/chatmock/prompt.md
new file mode 100644
index 0000000000000000000000000000000000000000..7783dbd83f8e4f38317c73a973c43782079ea766
--- /dev/null
+++ b/build/lib/chatmock/prompt.md
@@ -0,0 +1 @@
+../prompt.md
\ No newline at end of file
diff --git a/build/lib/chatmock/prompt_gpt5_codex.md b/build/lib/chatmock/prompt_gpt5_codex.md
new file mode 100644
index 0000000000000000000000000000000000000000..04aa304f44abe0603c22f0dc3d4dbf9e9e023179
--- /dev/null
+++ b/build/lib/chatmock/prompt_gpt5_codex.md
@@ -0,0 +1 @@
+../prompt_gpt5_codex.md
\ No newline at end of file
diff --git a/build/lib/chatmock/reasoning.py b/build/lib/chatmock/reasoning.py
new file mode 100644
index 0000000000000000000000000000000000000000..37c276c3f96174d777aa1d2aae053c6376cfb92b
--- /dev/null
+++ b/build/lib/chatmock/reasoning.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+from typing import Any, Dict
+
+from .model_registry import DEFAULT_REASONING_EFFORTS, allowed_efforts_for_model, extract_reasoning_from_model_name
+
+
+def build_reasoning_param(
+    base_effort: str = "medium",
+    base_summary: str = "auto",
+    overrides: Dict[str, Any] | None = None,
+    *,
+    allowed_efforts: frozenset[str] | None = None,
+) -> Dict[str, Any]:
+    effort = (base_effort or "").strip().lower()
+    summary = (base_summary or "").strip().lower()
+
+    valid_efforts = allowed_efforts or DEFAULT_REASONING_EFFORTS
+    valid_summaries = {"auto", "concise", "detailed", "none"}
+
+    if isinstance(overrides, dict):
+        o_eff = str(overrides.get("effort", "")).strip().lower()
+        o_sum = str(overrides.get("summary", "")).strip().lower()
+        if o_eff in valid_efforts and o_eff:
+            effort = o_eff
+        if o_sum in valid_summaries and o_sum:
+            summary = o_sum
+    if effort not in valid_efforts:
+        effort = "medium"
+    if summary not in valid_summaries:
+        summary = "auto"
+
+    reasoning: Dict[str, Any] = {"effort": effort}
+    if summary != "none":
+        reasoning["summary"] = summary
+    return reasoning
+
+
+def apply_reasoning_to_message(
+    message: Dict[str, Any],
+    reasoning_summary_text: str,
+    reasoning_full_text: str,
+    compat: str,
+) -> Dict[str, Any]:
+    try:
+        compat = (compat or "think-tags").strip().lower()
+    except Exception:
+        compat = "think-tags"
+
+    if compat == "o3":
+        rtxt_parts: list[str] = []
+        if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+            rtxt_parts.append(reasoning_summary_text)
+        if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+            rtxt_parts.append(reasoning_full_text)
+        rtxt = "\n\n".join([p for p in rtxt_parts if p])
+        if rtxt:
+            message["reasoning"] = {"content": [{"type": "text", "text": rtxt}]}
+        return message
+
+    if compat in ("legacy", "current"):
+        if reasoning_summary_text:
+            message["reasoning_summary"] = reasoning_summary_text
+        if reasoning_full_text:
+            message["reasoning"] = reasoning_full_text
+        return message
+
+    rtxt_parts: list[str] = []
+    if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+        rtxt_parts.append(reasoning_summary_text)
+    if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+        rtxt_parts.append(reasoning_full_text)
+    rtxt = "\n\n".join([p for p in rtxt_parts if p])
+    if rtxt:
+        think_block = f"<think>{rtxt}</think>"
+        content_text = message.get("content") or ""
+        if isinstance(content_text, str):
+            message["content"] = think_block + (content_text or "")
+    return message
diff --git a/build/lib/chatmock/responses_api.py b/build/lib/chatmock/responses_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..51bda2ac5203dbec8f36cd7ba5d03204085b310b
--- /dev/null
+++ b/build/lib/chatmock/responses_api.py
@@ -0,0 +1,243 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, Iterator, List
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import ServiceTierResolution, resolve_service_tier
+from .model_registry import (
+    allowed_efforts_for_model,
+    extract_reasoning_from_model_name,
+    normalize_model_name,
+    uses_codex_instructions,
+)
+from .reasoning import build_reasoning_param
+from .session import ensure_session_id
+
+
+@dataclass(frozen=True)
+class ResponsesRequestError(Exception):
+    message: str
+    status_code: int = 400
+    code: str | None = None
+
+    def __str__(self) -> str:
+        return self.message
+
+
+@dataclass(frozen=True)
+class NormalizedResponsesRequest:
+    payload: Dict[str, Any]
+    requested_model: str | None
+    normalized_model: str
+    session_id: str
+    service_tier_resolution: ServiceTierResolution
+
+
+def instructions_for_model(config: Dict[str, Any], model: str) -> str:
+    base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
+    if uses_codex_instructions(model):
+        codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
+        if isinstance(codex, str) and codex.strip():
+            return codex
+    return base
+
+
+def extract_client_session_id(headers: Any) -> str | None:
+    try:
+        return headers.get("X-Session-Id") or headers.get("session_id") or None
+    except Exception:
+        return None
+
+
+def _input_items_for_session(raw_input: Any) -> List[Dict[str, Any]]:
+    if isinstance(raw_input, list):
+        return [item for item in raw_input if isinstance(item, dict)]
+    if isinstance(raw_input, dict):
+        return [raw_input]
+    if isinstance(raw_input, str) and raw_input.strip():
+        return [
+            {
+                "type": "message",
+                "role": "user",
+                "content": [{"type": "input_text", "text": raw_input}],
+            }
+        ]
+    return []
+
+
+def canonicalize_responses_input(raw_input: Any) -> Any:
+    if isinstance(raw_input, list):
+        return [item for item in raw_input if isinstance(item, dict)]
+    if isinstance(raw_input, dict):
+        return [raw_input]
+    if isinstance(raw_input, str):
+        return _input_items_for_session(raw_input)
+    return raw_input
+
+
+def normalize_responses_payload(
+    payload: Dict[str, Any],
+    *,
+    config: Dict[str, Any],
+    client_session_id: str | None = None,
+) -> NormalizedResponsesRequest:
+    requested_model = payload.get("model") if isinstance(payload.get("model"), str) else None
+    normalized_model = normalize_model_name(requested_model, config.get("DEBUG_MODEL"))
+
+    normalized = dict(payload)
+    normalized["model"] = normalized_model
+    normalized.pop("max_output_tokens", None)
+
+    if "input" in normalized:
+        normalized["input"] = canonicalize_responses_input(normalized.get("input"))
+
+    if "store" not in normalized:
+        normalized["store"] = False
+
+    instructions = normalized.get("instructions")
+    if not isinstance(instructions, str) or not instructions.strip():
+        instructions = instructions_for_model(config, normalized_model)
+        normalized["instructions"] = instructions
+
+    reasoning_effort = config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = config.get("REASONING_SUMMARY", "auto")
+    reasoning_overrides = (
+        normalized.get("reasoning")
+        if isinstance(normalized.get("reasoning"), dict)
+        else extract_reasoning_from_model_name(requested_model)
+    )
+    normalized["reasoning"] = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(normalized_model),
+    )
+
+    include = normalized.get("include")
+    include_list = [item for item in include if isinstance(item, str)] if isinstance(include, list) else []
+    if "reasoning.encrypted_content" not in include_list:
+        include_list.append("reasoning.encrypted_content")
+    normalized["include"] = include_list
+
+    tools = normalized.get("tools")
+    if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")):
+        tool_choice = normalized.get("tool_choice")
+        if not (isinstance(tool_choice, str) and tool_choice.strip().lower() == "none"):
+            normalized["tools"] = [{"type": "web_search"}]
+
+    service_tier_resolution = resolve_service_tier(
+        normalized_model,
+        request_fast_mode=normalized.get("fast_mode"),
+        request_service_tier=normalized.get("service_tier"),
+        server_fast_mode=bool(config.get("FAST_MODE")),
+    )
+    if service_tier_resolution.error_message:
+        raise ResponsesRequestError(service_tier_resolution.error_message)
+    if service_tier_resolution.service_tier is None:
+        normalized.pop("service_tier", None)
+    else:
+        normalized["service_tier"] = service_tier_resolution.service_tier
+    normalized.pop("fast_mode", None)
+
+    input_items = _input_items_for_session(normalized.get("input"))
+    session_id = ensure_session_id(instructions, input_items, client_session_id)
+    prompt_cache_key = normalized.get("prompt_cache_key")
+    if not isinstance(prompt_cache_key, str) or not prompt_cache_key.strip():
+        normalized["prompt_cache_key"] = session_id
+
+    return NormalizedResponsesRequest(
+        payload=normalized,
+        requested_model=requested_model,
+        normalized_model=normalized_model,
+        session_id=session_id,
+        service_tier_resolution=service_tier_resolution,
+    )
+
+
+def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]:
+    for raw in upstream.iter_lines(decode_unicode=False):
+        if not raw:
+            continue
+        line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+        if not line.startswith("data: "):
+            continue
+        data = line[len("data: ") :].strip()
+        if not data or data == "[DONE]":
+            if data == "[DONE]":
+                break
+            continue
+        try:
+            evt = json.loads(data)
+        except Exception:
+            continue
+        if isinstance(evt, dict):
+            yield evt
+
+
+def aggregate_response_from_sse(
+    upstream: Any,
+    *,
+    on_event: Any | None = None,
+) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]:
+    response_obj: Dict[str, Any] | None = None
+    error_obj: Dict[str, Any] | None = None
+    try:
+        for evt in iter_sse_event_payloads(upstream):
+            if callable(on_event):
+                try:
+                    on_event(evt)
+                except Exception:
+                    pass
+            response = evt.get("response")
+            if isinstance(response, dict):
+                response_obj = response
+            kind = evt.get("type")
+            if kind == "response.failed":
+                if isinstance(response, dict) and isinstance(response.get("error"), dict):
+                    error_obj = {"error": response.get("error")}
+                else:
+                    error_obj = {"error": {"message": "response.failed"}}
+                break
+            if kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+    return response_obj, error_obj
+
+
+def stream_upstream_bytes(
+    upstream: Any,
+    *,
+    on_event: Any | None = None,
+) -> Iterable[bytes]:
+    buffer = b""
+    try:
+        for chunk in upstream.iter_content(chunk_size=None):
+            if chunk:
+                if callable(on_event):
+                    if isinstance(chunk, bytes):
+                        buffer += chunk
+                    else:
+                        buffer += str(chunk).encode("utf-8", errors="ignore")
+                    while b"\n" in buffer:
+                        line, buffer = buffer.split(b"\n", 1)
+                        line = line.rstrip(b"\r")
+                        if not line.startswith(b"data: "):
+                            continue
+                        data = line[len(b"data: ") :].strip()
+                        if not data or data == b"[DONE]":
+                            continue
+                        try:
+                            evt = json.loads(data.decode("utf-8", errors="ignore"))
+                        except Exception:
+                            evt = None
+                        if isinstance(evt, dict):
+                            try:
+                                on_event(evt)
+                            except Exception:
+                                pass
+                yield chunk
+    finally:
+        upstream.close()
diff --git a/build/lib/chatmock/routes_ollama.py b/build/lib/chatmock/routes_ollama.py
new file mode 100644
index 0000000000000000000000000000000000000000..5da18d0e4156f77092fcf287c8be6c9208f81096
--- /dev/null
+++ b/build/lib/chatmock/routes_ollama.py
@@ -0,0 +1,585 @@
+from __future__ import annotations
+
+import json
+import datetime
+import time
+from typing import Any, Dict, List
+
+from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import resolve_service_tier
+from .limits import record_rate_limits_from_response
+from .http import build_cors_headers
+from .model_registry import list_public_models, uses_codex_instructions
+from .responses_api import instructions_for_model
+from .reasoning import (
+    allowed_efforts_for_model,
+    build_reasoning_param,
+    extract_reasoning_from_model_name,
+)
+from .transform import convert_ollama_messages, normalize_ollama_tools
+from .upstream import normalize_model_name, start_upstream_request
+from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_to_responses
+
+
+ollama_bp = Blueprint("ollama", __name__)
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+
+
+def _wrap_stream_logging(label: str, iterator, enabled: bool):
+    if not enabled:
+        return iterator
+
+    def _gen():
+        for chunk in iterator:
+            try:
+                text = (
+                    chunk.decode("utf-8", errors="replace")
+                    if isinstance(chunk, (bytes, bytearray))
+                    else str(chunk)
+                )
+                print(f"{label}\n{text}")
+            except Exception:
+                pass
+            yield chunk
+
+    return _gen()
+
+
+@ollama_bp.route("/api/version", methods=["GET"])
+def ollama_version() -> Response:
+    if bool(current_app.config.get("VERBOSE")):
+        print("IN GET /api/version")
+    version = current_app.config.get("OLLAMA_VERSION", "0.12.10")
+    if not isinstance(version, str) or not version.strip():
+        version = "0.12.10"
+    payload = {"version": version}
+    resp = make_response(jsonify(payload), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    if bool(current_app.config.get("VERBOSE")):
+        _log_json("OUT GET /api/version", payload)
+    return resp
+
+
+def _instructions_for_model(model: str) -> str:
+    return instructions_for_model(current_app.config, model)
+
+
+_OLLAMA_FAKE_EVAL = {
+    "total_duration": 8497226791,
+    "load_duration": 1747193958,
+    "prompt_eval_count": 24,
+    "prompt_eval_duration": 269219750,
+    "eval_count": 247,
+    "eval_duration": 6413802458,
+}
+
+
+@ollama_bp.route("/api/tags", methods=["GET"])
+def ollama_tags() -> Response:
+    if bool(current_app.config.get("VERBOSE")):
+        print("IN GET /api/tags")
+    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
+    model_ids = list_public_models(expose_reasoning_models=expose_variants)
+    models = []
+    for model_id in model_ids:
+        models.append(
+            {
+                "name": model_id,
+                "model": model_id,
+                "modified_at": "2023-10-01T00:00:00Z",
+                "size": 815319791,
+                "digest": "8648f39daa8fbf5b18c7b4e6a8fb4990c692751d49917417b8842ca5758e7ffc",
+                "details": {
+                    "parent_model": "",
+                    "format": "gguf",
+                    "family": "llama",
+                    "families": ["llama"],
+                    "parameter_size": "8.0B",
+                    "quantization_level": "Q4_0",
+                },
+            }
+        )
+    payload = {"models": models}
+    resp = make_response(jsonify(payload), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    if bool(current_app.config.get("VERBOSE")):
+        _log_json("OUT GET /api/tags", payload)
+    return resp
+
+
+@ollama_bp.route("/api/show", methods=["POST"])
+def ollama_show() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    raw_body = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /api/show\n" + raw_body)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw_body) if raw_body else (request.get_json(silent=True) or {})
+    except Exception:
+        payload = request.get_json(silent=True) or {}
+    model = payload.get("model")
+    if not isinstance(model, str) or not model.strip():
+        err = {"error": "Model not found"}
+        if verbose:
+            _log_json("OUT POST /api/show", err)
+        return jsonify(err), 400
+    v1_show_response = {
+        "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /models/blobs/sha256:placeholder\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 100000\nPARAMETER stop \"</s>\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
+        "parameters": "num_keep 24\nstop \"<|start_header_id|>\"\nstop \"<|end_header_id|>\"\nstop \"<|eot_id|>\"",
+        "template": "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>",
+        "details": {
+            "parent_model": "",
+            "format": "gguf",
+            "family": "llama",
+            "families": ["llama"],
+            "parameter_size": "8.0B",
+            "quantization_level": "Q4_0",
+        },
+        "model_info": {
+            "general.architecture": "llama",
+            "general.file_type": 2,
+            "llama.context_length": 2000000,
+        },
+        "capabilities": ["completion", "vision", "tools", "thinking"],
+    }
+    if verbose:
+        _log_json("OUT POST /api/show", v1_show_response)
+    resp = make_response(jsonify(v1_show_response), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+
+
+@ollama_bp.route("/api/chat", methods=["POST"])
+def ollama_chat() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+    reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
+
+    try:
+        raw = request.get_data(cache=True, as_text=True) or ""
+        if verbose:
+            print("IN POST /api/chat\n" + (raw if isinstance(raw, str) else ""))
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": "Invalid JSON body"}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+
+    model = payload.get("model")
+    raw_messages = payload.get("messages")
+    messages = convert_ollama_messages(
+        raw_messages, payload.get("images") if isinstance(payload.get("images"), list) else None
+    )
+    if isinstance(messages, list):
+        sys_idx = next((i for i, m in enumerate(messages) if isinstance(m, dict) and m.get("role") == "system"), None)
+        if isinstance(sys_idx, int):
+            sys_msg = messages.pop(sys_idx)
+            content = sys_msg.get("content") if isinstance(sys_msg, dict) else ""
+            messages.insert(0, {"role": "user", "content": content})
+    stream_req = payload.get("stream")
+    if stream_req is None:
+        stream_req = True
+    stream_req = bool(stream_req)
+    tools_req = payload.get("tools") if isinstance(payload.get("tools"), list) else []
+    tools_responses = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
+    tool_choice = payload.get("tool_choice", "auto")
+    parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
+
+    # Passthrough Responses API tools (web_search) via ChatMock extension fields
+    extra_tools: List[Dict[str, Any]] = []
+    had_responses_tools = False
+    rt_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
+    if isinstance(rt_payload, list):
+        for _t in rt_payload:
+            if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
+                continue
+            if _t.get("type") not in ("web_search", "web_search_preview"):
+                err = {"error": "Only web_search/web_search_preview are supported in responses_tools"}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), 400
+            extra_tools.append(_t)
+        if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
+            rtc = payload.get("responses_tool_choice")
+            if not (isinstance(rtc, str) and rtc == "none"):
+                extra_tools = [{"type": "web_search"}]
+        if extra_tools:
+            import json as _json
+            MAX_TOOLS_BYTES = 32768
+            try:
+                size = len(_json.dumps(extra_tools))
+            except Exception:
+                size = 0
+            if size > MAX_TOOLS_BYTES:
+                err = {"error": "responses_tools too large"}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), 400
+            had_responses_tools = True
+            tools_responses = (tools_responses or []) + extra_tools
+
+    rtc = payload.get("responses_tool_choice")
+    if isinstance(rtc, str) and rtc in ("auto", "none"):
+        tool_choice = rtc
+
+    if not isinstance(model, str) or not isinstance(messages, list) or not messages:
+        err = {"error": "Invalid request format"}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+
+    input_items = convert_chat_messages_to_responses_input(messages)
+
+    model_reasoning = extract_reasoning_from_model_name(model)
+    normalized_model = normalize_model_name(model, current_app.config.get("DEBUG_MODEL"))
+    service_tier_resolution = resolve_service_tier(
+        normalized_model,
+        request_fast_mode=payload.get("fast_mode"),
+        request_service_tier=payload.get("service_tier"),
+        server_fast_mode=bool(current_app.config.get("FAST_MODE")),
+    )
+    if service_tier_resolution.warning_message and verbose:
+        print(f"[FastMode] {service_tier_resolution.warning_message}")
+    if service_tier_resolution.error_message:
+        err = {"error": service_tier_resolution.error_message}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+    upstream, error_resp = start_upstream_request(
+        normalized_model,
+        input_items,
+        instructions=_instructions_for_model(normalized_model),
+        tools=tools_responses,
+        tool_choice=tool_choice,
+        parallel_tool_calls=parallel_tool_calls,
+        reasoning_param=build_reasoning_param(
+            reasoning_effort,
+            reasoning_summary,
+            model_reasoning,
+            allowed_efforts=allowed_efforts_for_model(model),
+        ),
+        service_tier=service_tier_resolution.service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /api/chat", parsed)
+            except Exception:
+                pass
+        return error_resp
+
+    record_rate_limits_from_response(upstream)
+
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        if had_responses_tools:
+            if verbose:
+                print("[Passthrough] Upstream rejected tools; retrying without extras (args redacted)")
+            base_tools_only = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
+            safe_choice = payload.get("tool_choice", "auto")
+            upstream2, err2 = start_upstream_request(
+                normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
+                input_items,
+                instructions=BASE_INSTRUCTIONS,
+                tools=base_tools_only,
+                tool_choice=safe_choice,
+                parallel_tool_calls=parallel_tool_calls,
+                reasoning_param=build_reasoning_param(
+                    reasoning_effort,
+                    reasoning_summary,
+                    model_reasoning,
+                    allowed_efforts=allowed_efforts_for_model(model),
+                ),
+                service_tier=service_tier_resolution.service_tier,
+            )
+            record_rate_limits_from_response(upstream2)
+            if err2 is None and upstream2 is not None and upstream2.status_code < 400:
+                upstream = upstream2
+            else:
+                err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
+        else:
+            if verbose:
+                print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
+            err = {"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}
+            if verbose:
+                _log_json("OUT POST /api/chat", err)
+            return jsonify(err), upstream.status_code
+
+    created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+    model_out = model if isinstance(model, str) and model.strip() else normalized_model
+
+    if stream_req:
+        def _gen():
+            compat = (current_app.config.get("REASONING_COMPAT", "think-tags") or "think-tags").strip().lower()
+            think_open = False
+            think_closed = False
+            saw_any_summary = False
+            pending_summary_paragraph = False
+            full_parts: List[str] = []
+            try:
+                for raw_line in upstream.iter_lines(decode_unicode=False):
+                    if not raw_line:
+                        continue
+                    line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+                    if not line.startswith("data: "):
+                        continue
+                    data = line[len("data: "):].strip()
+                    if not data:
+                        continue
+                    if data == "[DONE]":
+                        break
+                    try:
+                        evt = json.loads(data)
+                    except Exception:
+                        continue
+                    kind = evt.get("type")
+                    if kind == "response.reasoning_summary_part.added":
+                        if compat in ("think-tags", "o3"):
+                            if saw_any_summary:
+                                pending_summary_paragraph = True
+                            else:
+                                saw_any_summary = True
+                    elif kind in ("response.reasoning_summary_text.delta", "response.reasoning_text.delta"):
+                        delta_txt = evt.get("delta") or ""
+                        if compat == "o3":
+                            if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": "\n"},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append("\n")
+                                pending_summary_paragraph = False
+                            if delta_txt:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": delta_txt},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append(delta_txt)
+                        elif compat == "think-tags":
+                            if not think_open and not think_closed:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": "<think>"},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append("<think>")
+                                think_open = True
+                            if think_open and not think_closed:
+                                if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                                    yield (
+                                        json.dumps(
+                                            {
+                                                "model": model_out,
+                                                "created_at": created_at,
+                                                "message": {"role": "assistant", "content": "\n"},
+                                                "done": False,
+                                            }
+                                        )
+                                        + "\n"
+                                    )
+                                    full_parts.append("\n")
+                                    pending_summary_paragraph = False
+                                if delta_txt:
+                                    yield (
+                                        json.dumps(
+                                            {
+                                                "model": model_out,
+                                                "created_at": created_at,
+                                                "message": {"role": "assistant", "content": delta_txt},
+                                                "done": False,
+                                            }
+                                        )
+                                        + "\n"
+                                    )
+                                    full_parts.append(delta_txt)
+                        else:
+                            pass
+                    elif kind == "response.output_text.delta":
+                        delta = evt.get("delta") or ""
+                        if compat == "think-tags" and think_open and not think_closed:
+                            yield (
+                                json.dumps(
+                                    {
+                                        "model": model_out,
+                                        "created_at": created_at,
+                                        "message": {"role": "assistant", "content": "</think>"},
+                                        "done": False,
+                                    }
+                                )
+                                + "\n"
+                            )
+                            full_parts.append("</think>")
+                            think_open = False
+                            think_closed = True
+                        if delta:
+                            yield (
+                                json.dumps(
+                                    {
+                                        "model": model_out,
+                                        "created_at": created_at,
+                                        "message": {"role": "assistant", "content": delta},
+                                        "done": False,
+                                    }
+                                )
+                                + "\n"
+                            )
+                            full_parts.append(delta)
+                    elif kind == "response.completed":
+                        break
+            finally:
+                upstream.close()
+                if compat == "think-tags" and think_open and not think_closed:
+                    yield (
+                        json.dumps(
+                            {
+                                "model": model_out,
+                                "created_at": created_at,
+                                "message": {"role": "assistant", "content": "</think>"},
+                                "done": False,
+                            }
+                        )
+                        + "\n"
+                    )
+                    full_parts.append("</think>")
+                done_obj = {
+                    "model": model_out,
+                    "created_at": created_at,
+                    "message": {"role": "assistant", "content": ""},
+                    "done": True,
+                }
+                done_obj.update(_OLLAMA_FAKE_EVAL)
+                yield json.dumps(done_obj) + "\n"
+        if verbose:
+            print("OUT POST /api/chat (streaming response)")
+        stream_iter = stream_with_context(_gen())
+        stream_iter = _wrap_stream_logging("STREAM OUT /api/chat", stream_iter, verbose)
+        resp = current_app.response_class(
+            stream_iter,
+            status=200,
+            mimetype="application/x-ndjson",
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    full_text = ""
+    reasoning_summary_text = ""
+    reasoning_full_text = ""
+    tool_calls: List[Dict[str, Any]] = []
+    try:
+        for raw in upstream.iter_lines(decode_unicode=False):
+            if not raw:
+                continue
+            line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data:
+                continue
+            if data == "[DONE]":
+                break
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_summary_text.delta":
+                reasoning_summary_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_text.delta":
+                reasoning_full_text += evt.get("delta") or ""
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and item.get("type") == "function_call":
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ""
+                    args = item.get("arguments") or ""
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        tool_calls.append(
+                            {
+                                "id": call_id,
+                                "type": "function",
+                                "function": {"name": name, "arguments": args},
+                            }
+                        )
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+
+    if (current_app.config.get("REASONING_COMPAT", "think-tags") or "think-tags").strip().lower() == "think-tags":
+        rtxt_parts = []
+        if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+            rtxt_parts.append(reasoning_summary_text)
+        if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+            rtxt_parts.append(reasoning_full_text)
+        rtxt = "\n\n".join([p for p in rtxt_parts if p])
+        if rtxt:
+            full_text = f"<think>{rtxt}</think>" + (full_text or "")
+
+    out_json = {
+        "model": normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
+        "created_at": created_at,
+        "message": {"role": "assistant", "content": full_text, **({"tool_calls": tool_calls} if tool_calls else {})},
+        "done": True,
+        "done_reason": "stop",
+    }
+    out_json.update(_OLLAMA_FAKE_EVAL)
+    if verbose:
+        _log_json("OUT POST /api/chat", out_json)
+    resp = make_response(jsonify(out_json), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
diff --git a/build/lib/chatmock/routes_openai.py b/build/lib/chatmock/routes_openai.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb378425ecebf4e71a51d21bdbfb4b0527f3fc1c
--- /dev/null
+++ b/build/lib/chatmock/routes_openai.py
@@ -0,0 +1,738 @@
+from __future__ import annotations
+
+import json
+import time
+from typing import Any, Dict, List
+
+from flask import Blueprint, Response, current_app, jsonify, make_response, request
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import resolve_service_tier
+from .limits import record_rate_limits_from_response
+from .http import build_cors_headers
+from .model_registry import list_public_models, uses_codex_instructions
+from .responses_api import (
+    ResponsesRequestError,
+    aggregate_response_from_sse,
+    extract_client_session_id,
+    instructions_for_model,
+    normalize_responses_payload,
+    stream_upstream_bytes,
+)
+from .reasoning import (
+    allowed_efforts_for_model,
+    apply_reasoning_to_message,
+    build_reasoning_param,
+    extract_reasoning_from_model_name,
+)
+from .session import (
+    clear_responses_reuse_state,
+    note_responses_final_response,
+    note_responses_stream_event,
+    prepare_responses_request_for_session,
+)
+from .upstream import normalize_model_name, start_upstream_raw_request, start_upstream_request
+from .utils import (
+    convert_chat_messages_to_responses_input,
+    convert_tools_chat_to_responses,
+    sse_translate_chat,
+    sse_translate_text,
+)
+
+
+openai_bp = Blueprint("openai", __name__)
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+
+
+def _wrap_stream_logging(label: str, iterator, enabled: bool):
+    if not enabled:
+        return iterator
+
+    def _gen():
+        for chunk in iterator:
+            try:
+                text = (
+                    chunk.decode("utf-8", errors="replace")
+                    if isinstance(chunk, (bytes, bytearray))
+                    else str(chunk)
+                )
+                print(f"{label}\n{text}")
+            except Exception:
+                pass
+            yield chunk
+
+    return _gen()
+
+
+def _instructions_for_model(model: str) -> str:
+    return instructions_for_model(current_app.config, model)
+
+
+def _service_tier_from_payload(
+    model: str,
+    payload: Dict[str, Any],
+    *,
+    verbose: bool = False,
+) -> tuple[str | None, Response | None]:
+    resolution = resolve_service_tier(
+        model,
+        request_fast_mode=payload.get("fast_mode"),
+        request_service_tier=payload.get("service_tier"),
+        server_fast_mode=bool(current_app.config.get("FAST_MODE")),
+    )
+    if resolution.warning_message and verbose:
+        print(f"[FastMode] {resolution.warning_message}")
+    if resolution.error_message:
+        err = {"error": {"message": resolution.error_message}}
+        if verbose:
+            _log_json("OUT POST service_tier resolution", err)
+        resp = make_response(jsonify(err), 400)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+    return resolution.service_tier, None
+
+
+@openai_bp.route("/v1/chat/completions", methods=["POST"])
+def chat_completions() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+    reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
+
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/chat/completions\n" + raw)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        try:
+            payload = json.loads(raw.replace("\r", "").replace("\n", ""))
+        except Exception:
+            err = {"error": {"message": "Invalid JSON body"}}
+            if verbose:
+                _log_json("OUT POST /v1/chat/completions", err)
+            return jsonify(err), 400
+
+    requested_model = payload.get("model")
+    model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
+    messages = payload.get("messages")
+    if messages is None and isinstance(payload.get("prompt"), str):
+        messages = [{"role": "user", "content": payload.get("prompt") or ""}]
+    if messages is None and isinstance(payload.get("input"), str):
+        messages = [{"role": "user", "content": payload.get("input") or ""}]
+    if messages is None:
+        messages = []
+    if not isinstance(messages, list):
+        err = {"error": {"message": "Request must include messages: []"}}
+        if verbose:
+            _log_json("OUT POST /v1/chat/completions", err)
+        return jsonify(err), 400
+
+    if isinstance(messages, list):
+        sys_idx = next((i for i, m in enumerate(messages) if isinstance(m, dict) and m.get("role") == "system"), None)
+        if isinstance(sys_idx, int):
+            sys_msg = messages.pop(sys_idx)
+            content = sys_msg.get("content") if isinstance(sys_msg, dict) else ""
+            messages.insert(0, {"role": "user", "content": content})
+    is_stream = bool(payload.get("stream"))
+    stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {}
+    include_usage = bool(stream_options.get("include_usage", False))
+
+    tools_responses = convert_tools_chat_to_responses(payload.get("tools"))
+    tool_choice = payload.get("tool_choice", "auto")
+    parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
+    responses_tools_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
+    extra_tools: List[Dict[str, Any]] = []
+    had_responses_tools = False
+    if isinstance(responses_tools_payload, list):
+        for _t in responses_tools_payload:
+            if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
+                continue
+            if _t.get("type") not in ("web_search", "web_search_preview"):
+                err = {
+                    "error": {
+                        "message": "Only web_search/web_search_preview are supported in responses_tools",
+                        "code": "RESPONSES_TOOL_UNSUPPORTED",
+                    }
+                }
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), 400
+            extra_tools.append(_t)
+
+        if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
+            responses_tool_choice = payload.get("responses_tool_choice")
+            if not (isinstance(responses_tool_choice, str) and responses_tool_choice == "none"):
+                extra_tools = [{"type": "web_search"}]
+
+        if extra_tools:
+            import json as _json
+            MAX_TOOLS_BYTES = 32768
+            try:
+                size = len(_json.dumps(extra_tools))
+            except Exception:
+                size = 0
+            if size > MAX_TOOLS_BYTES:
+                err = {"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), 400
+            had_responses_tools = True
+            tools_responses = (tools_responses or []) + extra_tools
+
+    responses_tool_choice = payload.get("responses_tool_choice")
+    if isinstance(responses_tool_choice, str) and responses_tool_choice in ("auto", "none"):
+        tool_choice = responses_tool_choice
+
+    input_items = convert_chat_messages_to_responses_input(messages)
+    if not input_items and isinstance(payload.get("prompt"), str) and payload.get("prompt").strip():
+        input_items = [
+            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": payload.get("prompt")}]}
+        ]
+
+    model_reasoning = extract_reasoning_from_model_name(requested_model)
+    reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
+    reasoning_param = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(model),
+    )
+    service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose)
+    if tier_error is not None:
+        return tier_error
+
+    upstream, error_resp = start_upstream_request(
+        model,
+        input_items,
+        instructions=_instructions_for_model(model),
+        tools=tools_responses,
+        tool_choice=tool_choice,
+        parallel_tool_calls=parallel_tool_calls,
+        reasoning_param=reasoning_param,
+        service_tier=service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/chat/completions", parsed)
+            except Exception:
+                pass
+        return error_resp
+
+    record_rate_limits_from_response(upstream)
+
+    created = int(time.time())
+    if upstream.status_code >= 400:
+        try:
+            raw = upstream.content
+            err_body = json.loads(raw.decode("utf-8", errors="ignore")) if raw else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        if had_responses_tools:
+            if verbose:
+                print("[Passthrough] Upstream rejected tools; retrying without extra tools (args redacted)")
+            base_tools_only = convert_tools_chat_to_responses(payload.get("tools"))
+            safe_choice = payload.get("tool_choice", "auto")
+            upstream2, err2 = start_upstream_request(
+                model,
+                input_items,
+                instructions=BASE_INSTRUCTIONS,
+                tools=base_tools_only,
+                tool_choice=safe_choice,
+                parallel_tool_calls=parallel_tool_calls,
+                reasoning_param=reasoning_param,
+                service_tier=service_tier,
+            )
+            record_rate_limits_from_response(upstream2)
+            if err2 is None and upstream2 is not None and upstream2.status_code < 400:
+                upstream = upstream2
+            else:
+                err = {
+                    "error": {
+                        "message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
+                        "code": "RESPONSES_TOOLS_REJECTED",
+                    }
+                }
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
+        else:
+            if verbose:
+                print("Upstream error status=", upstream.status_code)
+            err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
+            if verbose:
+                _log_json("OUT POST /v1/chat/completions", err)
+            return jsonify(err), upstream.status_code
+
+    if is_stream:
+        if verbose:
+            print("OUT POST /v1/chat/completions (streaming response)")
+        stream_iter = sse_translate_chat(
+            upstream,
+            requested_model or model,
+            created,
+            verbose=verbose_obfuscation,
+            vlog=print if verbose_obfuscation else None,
+            reasoning_compat=reasoning_compat,
+            include_usage=include_usage,
+        )
+        stream_iter = _wrap_stream_logging("STREAM OUT /v1/chat/completions", stream_iter, verbose)
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    full_text = ""
+    reasoning_summary_text = ""
+    reasoning_full_text = ""
+    response_id = "chatcmpl"
+    tool_calls: List[Dict[str, Any]] = []
+    error_message: str | None = None
+    usage_obj: Dict[str, int] | None = None
+
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw in upstream.iter_lines(decode_unicode=False):
+            if not raw:
+                continue
+            line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data:
+                continue
+            if data == "[DONE]":
+                break
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            mu = _extract_usage(evt)
+            if mu:
+                usage_obj = mu
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_summary_text.delta":
+                reasoning_summary_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_text.delta":
+                reasoning_full_text += evt.get("delta") or ""
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and item.get("type") == "function_call":
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ""
+                    args = item.get("arguments") or ""
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        tool_calls.append(
+                            {
+                                "id": call_id,
+                                "type": "function",
+                                "function": {"name": name, "arguments": args},
+                            }
+                        )
+            elif kind == "response.failed":
+                error_message = evt.get("response", {}).get("error", {}).get("message", "response.failed")
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+
+    if error_message:
+        resp = make_response(jsonify({"error": {"message": error_message}}), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    message: Dict[str, Any] = {"role": "assistant", "content": full_text if full_text else None}
+    if tool_calls:
+        message["tool_calls"] = tool_calls
+    message = apply_reasoning_to_message(message, reasoning_summary_text, reasoning_full_text, reasoning_compat)
+    completion = {
+        "id": response_id or "chatcmpl",
+        "object": "chat.completion",
+        "created": created,
+        "model": requested_model or model,
+        "choices": [
+            {
+                "index": 0,
+                "message": message,
+                "finish_reason": "stop",
+            }
+        ],
+        **({"usage": usage_obj} if usage_obj else {}),
+    }
+    if verbose:
+        _log_json("OUT POST /v1/chat/completions", completion)
+    resp = make_response(jsonify(completion), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+
+
+@openai_bp.route("/v1/completions", methods=["POST"])
+def completions() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/completions\n" + raw)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": {"message": "Invalid JSON body"}}
+        if verbose:
+            _log_json("OUT POST /v1/completions", err)
+        return jsonify(err), 400
+
+    requested_model = payload.get("model")
+    model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
+    prompt = payload.get("prompt")
+    if isinstance(prompt, list):
+        prompt = "".join([p if isinstance(p, str) else "" for p in prompt])
+    if not isinstance(prompt, str):
+        prompt = payload.get("suffix") or ""
+    stream_req = bool(payload.get("stream", False))
+    stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {}
+    include_usage = bool(stream_options.get("include_usage", False))
+
+    messages = [{"role": "user", "content": prompt or ""}]
+    input_items = convert_chat_messages_to_responses_input(messages)
+
+    model_reasoning = extract_reasoning_from_model_name(requested_model)
+    reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
+    reasoning_param = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(model),
+    )
+    service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose)
+    if tier_error is not None:
+        return tier_error
+    upstream, error_resp = start_upstream_request(
+        model,
+        input_items,
+        instructions=_instructions_for_model(model),
+        reasoning_param=reasoning_param,
+        service_tier=service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/completions", parsed)
+            except Exception:
+                pass
+        return error_resp
+
+    record_rate_limits_from_response(upstream)
+
+    created = int(time.time())
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
+        if verbose:
+            _log_json("OUT POST /v1/completions", err)
+        return jsonify(err), upstream.status_code
+
+    if stream_req:
+        if verbose:
+            print("OUT POST /v1/completions (streaming response)")
+        stream_iter = sse_translate_text(
+            upstream,
+            requested_model or model,
+            created,
+            verbose=verbose_obfuscation,
+            vlog=(print if verbose_obfuscation else None),
+            include_usage=include_usage,
+        )
+        stream_iter = _wrap_stream_logging("STREAM OUT /v1/completions", stream_iter, verbose)
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    full_text = ""
+    response_id = "cmpl"
+    usage_obj: Dict[str, int] | None = None
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw_line in upstream.iter_lines(decode_unicode=False):
+            if not raw_line:
+                continue
+            line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data or data == "[DONE]":
+                if data == "[DONE]":
+                    break
+                continue
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            mu = _extract_usage(evt)
+            if mu:
+                usage_obj = mu
+            kind = evt.get("type")
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+
+    completion = {
+        "id": response_id or "cmpl",
+        "object": "text_completion",
+        "created": created,
+        "model": requested_model or model,
+        "choices": [
+            {"index": 0, "text": full_text, "finish_reason": "stop", "logprobs": None}
+        ],
+        **({"usage": usage_obj} if usage_obj else {}),
+    }
+    if verbose:
+        _log_json("OUT POST /v1/completions", completion)
+    resp = make_response(jsonify(completion), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+
+
+@openai_bp.route("/v1/responses", methods=["POST"])
+def responses_create() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/responses\n" + raw)
+        except Exception:
+            pass
+
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": {"message": "Invalid JSON body"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), 400
+
+    if not isinstance(payload, dict):
+        err = {"error": {"message": "Request body must be a JSON object"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), 400
+
+    try:
+        normalized = normalize_responses_payload(
+            payload,
+            config=current_app.config,
+            client_session_id=extract_client_session_id(request.headers),
+        )
+    except ResponsesRequestError as exc:
+        err: Dict[str, Any] = {"error": {"message": str(exc)}}
+        if exc.code:
+            err["error"]["code"] = exc.code
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), exc.status_code
+
+    if normalized.service_tier_resolution.warning_message and verbose:
+        print(f"[FastMode] {normalized.service_tier_resolution.warning_message}")
+
+    prepared = prepare_responses_request_for_session(
+        normalized.session_id,
+        normalized.payload,
+        allow_previous_response_id=False,
+    )
+    stream_req = bool(prepared.payload.get("stream", False))
+    upstream_payload = dict(prepared.payload)
+    upstream_payload["stream"] = True
+    upstream, error_resp = start_upstream_raw_request(
+        upstream_payload,
+        session_id=normalized.session_id,
+        stream=True,
+    )
+    if error_resp is not None:
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/responses", parsed)
+            except Exception:
+                pass
+        return error_resp
+
+    record_rate_limits_from_response(upstream)
+
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"error": {"message": upstream.text}}
+        except Exception:
+            err_body = {"error": {"message": upstream.text or "Upstream error"}}
+        finally:
+            upstream.close()
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            _log_json("OUT POST /v1/responses", err_body)
+        resp = make_response(jsonify(err_body), upstream.status_code)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    if stream_req:
+        if verbose:
+            print("OUT POST /v1/responses (streaming response)")
+        stream_iter = _wrap_stream_logging(
+            "STREAM OUT /v1/responses",
+            stream_upstream_bytes(
+                upstream,
+                on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+            ),
+            verbose,
+        )
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    content_type = upstream.headers.get("Content-Type", "")
+    if "application/json" in content_type.lower():
+        try:
+            body = upstream.json()
+        except Exception:
+            body = None
+        finally:
+            upstream.close()
+        if isinstance(body, dict):
+            note_responses_final_response(normalized.session_id, body)
+            if verbose:
+                _log_json("OUT POST /v1/responses", body)
+            resp = make_response(jsonify(body), upstream.status_code)
+            for k, v in build_cors_headers().items():
+                resp.headers.setdefault(k, v)
+            return resp
+
+    response_obj, error_obj = aggregate_response_from_sse(
+        upstream,
+        on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+    )
+    if error_obj is not None:
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            _log_json("OUT POST /v1/responses", error_obj)
+        resp = make_response(jsonify(error_obj), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    if response_obj is None:
+        clear_responses_reuse_state(normalized.session_id)
+        err = {"error": {"message": "Upstream response stream did not contain a completed response object"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        resp = make_response(jsonify(err), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    if verbose:
+        _log_json("OUT POST /v1/responses", response_obj)
+    resp = make_response(jsonify(response_obj), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+
+
+@openai_bp.route("/v1/models", methods=["GET"])
+def list_models() -> Response:
+    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
+    model_ids = list_public_models(expose_reasoning_models=expose_variants)
+    data = [{"id": mid, "object": "model", "owned_by": "owner"} for mid in model_ids]
+    models = {"object": "list", "data": data}
+    resp = make_response(jsonify(models), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
diff --git a/build/lib/chatmock/session.py b/build/lib/chatmock/session.py
new file mode 100644
index 0000000000000000000000000000000000000000..705a50cdf94586cf0af4e8c98c418a1a07c3b31f
--- /dev/null
+++ b/build/lib/chatmock/session.py
@@ -0,0 +1,312 @@
+from __future__ import annotations
+
+import copy
+import hashlib
+import json
+import threading
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
+
+
+_LOCK = threading.Lock()
+_FINGERPRINT_TO_UUID: Dict[str, str] = {}
+_ORDER: List[str] = []
+_MAX_ENTRIES = 10000
+_RESPONSES_SESSION_STATE: Dict[str, "_ResponsesSessionState"] = {}
+_RESPONSES_ORDER: List[str] = []
+
+
+@dataclass(frozen=True)
+class PreparedResponsesRequest:
+    payload: Dict[str, Any]
+    session_id: str
+
+
+@dataclass
+class _ResponsesSessionState:
+    last_request_payload: Dict[str, Any] | None = None
+    last_response_id: str | None = None
+    last_response_items: List[Dict[str, Any]] = field(default_factory=list)
+    inflight_request_payload: Dict[str, Any] | None = None
+    inflight_track_result: bool = False
+    inflight_response_id: str | None = None
+    inflight_response_items: List[Dict[str, Any]] = field(default_factory=list)
+
+
+def _canonicalize_first_user_message(input_items: List[Dict[str, Any]]) -> Dict[str, Any] | None:
+    """
+    Extract the first stable user message from Responses input items. Good use for a fingerprint for prompt caching.
+    """
+    for item in input_items:
+        if not isinstance(item, dict):
+            continue
+        if item.get("type") != "message":
+            continue
+        role = item.get("role")
+        if role != "user":
+            continue
+        content = item.get("content")
+        if not isinstance(content, list):
+            continue
+        norm_content = []
+        for part in content:
+            if not isinstance(part, dict):
+                continue
+            ptype = part.get("type")
+            if ptype == "input_text":
+                text = part.get("text") if isinstance(part.get("text"), str) else ""
+                if text:
+                    norm_content.append({"type": "input_text", "text": text})
+            elif ptype == "input_image":
+                url = part.get("image_url") if isinstance(part.get("image_url"), str) else None
+                if url:
+                    norm_content.append({"type": "input_image", "image_url": url})
+        if norm_content:
+            return {"type": "message", "role": "user", "content": norm_content}
+    return None
+
+
+def canonicalize_prefix(instructions: str | None, input_items: List[Dict[str, Any]]) -> str:
+    prefix: Dict[str, Any] = {}
+    if isinstance(instructions, str) and instructions.strip():
+        prefix["instructions"] = instructions.strip()
+    first_user = _canonicalize_first_user_message(input_items)
+    if first_user is not None:
+        prefix["first_user_message"] = first_user
+    return json.dumps(prefix, sort_keys=True, separators=(",", ":"))
+
+
+def _fingerprint(s: str) -> str:
+    return hashlib.sha256(s.encode("utf-8")).hexdigest()
+
+
+def _remember(fp: str, sid: str) -> None:
+    if fp in _FINGERPRINT_TO_UUID:
+        return
+    _FINGERPRINT_TO_UUID[fp] = sid
+    _ORDER.append(fp)
+    if len(_ORDER) > _MAX_ENTRIES:
+        oldest = _ORDER.pop(0)
+        _FINGERPRINT_TO_UUID.pop(oldest, None)
+
+
+def _remember_responses_session(session_id: str) -> _ResponsesSessionState:
+    state = _RESPONSES_SESSION_STATE.get(session_id)
+    if state is None:
+        state = _ResponsesSessionState()
+        _RESPONSES_SESSION_STATE[session_id] = state
+        _RESPONSES_ORDER.append(session_id)
+        if len(_RESPONSES_ORDER) > _MAX_ENTRIES:
+            oldest = _RESPONSES_ORDER.pop(0)
+            _RESPONSES_SESSION_STATE.pop(oldest, None)
+    return state
+
+
+def _request_without_input(payload: Dict[str, Any]) -> Dict[str, Any]:
+    clone = copy.deepcopy(payload)
+    clone["input"] = []
+    clone.pop("previous_response_id", None)
+    return clone
+
+
+def _input_list(payload: Dict[str, Any]) -> List[Dict[str, Any]] | None:
+    raw = payload.get("input")
+    if not isinstance(raw, list):
+        return None
+    return [item for item in copy.deepcopy(raw) if isinstance(item, dict)]
+
+
+def _conversation_output_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    reusable: List[Dict[str, Any]] = []
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        item_type = item.get("type")
+        if item_type == "reasoning":
+            continue
+        reusable.append(copy.deepcopy(item))
+    return reusable
+
+
+def _clear_reuse_state(state: _ResponsesSessionState) -> None:
+    state.last_request_payload = None
+    state.last_response_id = None
+    state.last_response_items = []
+    state.inflight_request_payload = None
+    state.inflight_track_result = False
+    state.inflight_response_id = None
+    state.inflight_response_items = []
+
+
+def _clear_inflight(state: _ResponsesSessionState) -> None:
+    state.inflight_request_payload = None
+    state.inflight_track_result = False
+    state.inflight_response_id = None
+    state.inflight_response_items = []
+
+
+def ensure_session_id(
+    instructions: str | None,
+    input_items: List[Dict[str, Any]],
+    client_supplied: str | None = None,
+) -> str:
+    if isinstance(client_supplied, str) and client_supplied.strip():
+        return client_supplied.strip()
+
+    canon = canonicalize_prefix(instructions, input_items)
+    fp = _fingerprint(canon)
+    with _LOCK:
+        if fp in _FINGERPRINT_TO_UUID:
+            return _FINGERPRINT_TO_UUID[fp]
+        sid = str(uuid.uuid4())
+        _remember(fp, sid)
+        return sid
+
+
+def prepare_responses_request_for_session(
+    session_id: str,
+    payload: Dict[str, Any],
+    *,
+    allow_previous_response_id: bool = True,
+) -> PreparedResponsesRequest:
+    full_payload = copy.deepcopy(payload)
+    outbound_payload = copy.deepcopy(payload)
+    explicit_previous_response_id = (
+        isinstance(full_payload.get("previous_response_id"), str)
+        and bool(full_payload.get("previous_response_id").strip())
+    )
+
+    with _LOCK:
+        state = _remember_responses_session(session_id)
+
+        if explicit_previous_response_id:
+            _clear_reuse_state(state)
+            return PreparedResponsesRequest(
+                payload=outbound_payload,
+                session_id=session_id,
+            )
+
+        request_input = _input_list(full_payload)
+        if (
+            allow_previous_response_id
+            and
+            state.last_request_payload is not None
+            and state.last_response_id
+            and request_input is not None
+            and _request_without_input(state.last_request_payload) == _request_without_input(full_payload)
+        ):
+            baseline: List[Dict[str, Any]] = []
+            previous_input = _input_list(state.last_request_payload)
+            if previous_input is not None:
+                baseline.extend(previous_input)
+            baseline.extend(copy.deepcopy(state.last_response_items))
+            baseline_len = len(baseline)
+            if request_input[:baseline_len] == baseline and baseline_len <= len(request_input):
+                outbound_payload["input"] = copy.deepcopy(request_input[baseline_len:])
+                outbound_payload["previous_response_id"] = state.last_response_id
+
+        state.inflight_request_payload = full_payload
+        state.inflight_track_result = True
+        state.inflight_response_id = None
+        state.inflight_response_items = []
+
+    return PreparedResponsesRequest(
+        payload=outbound_payload,
+        session_id=session_id,
+    )
+
+
+def note_responses_stream_event(session_id: str, event: Dict[str, Any]) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    if not isinstance(event, dict):
+        return
+
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+
+        kind = event.get("type")
+        if kind == "response.created":
+            response = event.get("response")
+            if isinstance(response, dict) and isinstance(response.get("id"), str):
+                state.inflight_response_id = response.get("id")
+            return
+
+        if kind == "response.output_item.done":
+            item = event.get("item")
+            if isinstance(item, dict):
+                state.inflight_response_items.append(copy.deepcopy(item))
+            return
+
+        if kind == "response.completed":
+            response = event.get("response")
+            response_id = None
+            response_items: List[Dict[str, Any]] = copy.deepcopy(state.inflight_response_items)
+            if isinstance(response, dict):
+                if isinstance(response.get("id"), str):
+                    response_id = response.get("id")
+                output = response.get("output")
+                if isinstance(output, list) and output:
+                    response_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)]
+            if not response_id:
+                response_id = state.inflight_response_id
+
+            if state.inflight_track_result and state.inflight_request_payload is not None and response_id:
+                state.last_request_payload = copy.deepcopy(state.inflight_request_payload)
+                state.last_response_id = response_id
+                state.last_response_items = _conversation_output_items(response_items)
+            else:
+                state.last_request_payload = None
+                state.last_response_id = None
+                state.last_response_items = []
+            _clear_inflight(state)
+            return
+
+        if kind in ("response.failed", "error"):
+            _clear_reuse_state(state)
+
+
+def note_responses_final_response(session_id: str, response_obj: Dict[str, Any]) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    if not isinstance(response_obj, dict):
+        return
+
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+
+        response_id = response_obj.get("id") if isinstance(response_obj.get("id"), str) else None
+        output = response_obj.get("output")
+        output_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)] if isinstance(output, list) else []
+        if state.inflight_track_result and state.inflight_request_payload is not None and response_id:
+            state.last_request_payload = copy.deepcopy(state.inflight_request_payload)
+            state.last_response_id = response_id
+            state.last_response_items = _conversation_output_items(output_items)
+        else:
+            state.last_request_payload = None
+            state.last_response_id = None
+            state.last_response_items = []
+        _clear_inflight(state)
+
+
+def clear_responses_reuse_state(session_id: str) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+        _clear_reuse_state(state)
+
+
+def reset_session_state() -> None:
+    with _LOCK:
+        _FINGERPRINT_TO_UUID.clear()
+        _ORDER.clear()
+        _RESPONSES_SESSION_STATE.clear()
+        _RESPONSES_ORDER.clear()
diff --git a/build/lib/chatmock/transform.py b/build/lib/chatmock/transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c611fb10adc27c04e630c2cd5f493c6a655bf89
--- /dev/null
+++ b/build/lib/chatmock/transform.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+import json
+from typing import Any, Dict, List
+
+
+def to_data_url(image_str: str) -> str:
+    if not isinstance(image_str, str) or not image_str:
+        return image_str
+    s = image_str.strip()
+    if s.startswith("data:image/"):
+        return s
+    if s.startswith("http://") or s.startswith("https://"):
+        return s
+    b64 = s.replace("\n", "").replace("\r", "")
+    kind = "image/png"
+    if b64.startswith("/9j/"):
+        kind = "image/jpeg"
+    elif b64.startswith("iVBORw0KGgo"):
+        kind = "image/png"
+    elif b64.startswith("R0lGOD"):
+        kind = "image/gif"
+    return f"data:{kind};base64,{b64}"
+
+
+def convert_ollama_messages(
+    messages: List[Dict[str, Any]] | None, top_images: List[str] | None
+) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    msgs = messages if isinstance(messages, list) else []
+    pending_call_ids: List[str] = []
+    call_counter = 0
+    for m in msgs:
+        if not isinstance(m, dict):
+            continue
+        role = m.get("role") or "user"
+        nm: Dict[str, Any] = {"role": role}
+
+        content = m.get("content")
+        images = m.get("images") if isinstance(m.get("images"), list) else []
+        parts: List[Dict[str, Any]] = []
+        if isinstance(content, list):
+            for p in content:
+                if isinstance(p, dict) and p.get("type") == "text" and isinstance(p.get("text"), str):
+                    parts.append({"type": "text", "text": p.get("text")})
+        elif isinstance(content, str):
+            parts.append({"type": "text", "text": content})
+        for img in images:
+            url = to_data_url(img)
+            if isinstance(url, str) and url:
+                parts.append({"type": "image_url", "image_url": {"url": url}})
+        if parts:
+            nm["content"] = parts
+
+        if role == "assistant" and isinstance(m.get("tool_calls"), list):
+            tcs = []
+            for tc in m.get("tool_calls"):
+                if not isinstance(tc, dict):
+                    continue
+                fn = tc.get("function") if isinstance(tc.get("function"), dict) else {}
+                name = fn.get("name") if isinstance(fn.get("name"), str) else None
+                args = fn.get("arguments")
+                if name is None:
+                    continue
+                call_id = tc.get("id") or tc.get("call_id")
+                if not isinstance(call_id, str) or not call_id:
+                    call_counter += 1
+                    call_id = f"ollama_call_{call_counter}"
+                pending_call_ids.append(call_id)
+                tcs.append(
+                    {
+                        "id": call_id,
+                        "type": "function",
+                        "function": {
+                            "name": name,
+                            "arguments": args if isinstance(args, str) else (json.dumps(args) if isinstance(args, dict) else "{}"),
+                        },
+                    }
+                )
+            if tcs:
+                nm["tool_calls"] = tcs
+
+        if role == "tool":
+            tci = m.get("tool_call_id") or m.get("id")
+            if not isinstance(tci, str) or not tci:
+                if pending_call_ids:
+                    tci = pending_call_ids.pop(0)
+            if isinstance(tci, str) and tci:
+                nm["tool_call_id"] = tci
+
+            if not parts and isinstance(content, str):
+                nm["content"] = content
+
+        out.append(nm)
+
+    if isinstance(top_images, list) and top_images:
+        attach_to = None
+        for i in range(len(out) - 1, -1, -1):
+            if out[i].get("role") == "user":
+                attach_to = out[i]
+                break
+        if attach_to is None:
+            attach_to = {"role": "user", "content": []}
+            out.append(attach_to)
+        attach_to.setdefault("content", [])
+        for img in top_images:
+            url = to_data_url(img)
+            if isinstance(url, str) and url:
+                attach_to["content"].append({"type": "image_url", "image_url": {"url": url}})
+    return out
+
+
+def normalize_ollama_tools(tools: List[Dict[str, Any]] | None) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    if not isinstance(tools, list):
+        return out
+    for t in tools:
+        if not isinstance(t, dict):
+            continue
+        if isinstance(t.get("function"), dict):
+            fn = t.get("function")
+            name = fn.get("name") if isinstance(fn.get("name"), str) else None
+            if not name:
+                continue
+            out.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": name,
+                        "description": fn.get("description") or "",
+                        "parameters": fn.get("parameters") if isinstance(fn.get("parameters"), dict) else {"type": "object", "properties": {}},
+                    },
+                }
+            )
+            continue
+        name = t.get("name") if isinstance(t.get("name"), str) else None
+        if name:
+            out.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": name,
+                        "description": t.get("description") or "",
+                        "parameters": {"type": "object", "properties": {}},
+                    },
+                }
+            )
+    return out
+
diff --git a/build/lib/chatmock/upstream.py b/build/lib/chatmock/upstream.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba995cb25b08a74e8f542d5eb824693fb35196c4
--- /dev/null
+++ b/build/lib/chatmock/upstream.py
@@ -0,0 +1,181 @@
+from __future__ import annotations
+
+import json
+import time
+from typing import Any, Dict, List, Tuple
+from urllib.parse import urlparse, urlunparse
+
+import requests
+from flask import Response, current_app, jsonify, make_response
+
+from .config import CHATGPT_RESPONSES_URL
+from .http import build_cors_headers
+from .model_registry import normalize_model_name
+from .session import ensure_session_id
+from flask import request as flask_request
+from .utils import get_effective_chatgpt_auth
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+
+def start_upstream_request(
+    model: str,
+    input_items: List[Dict[str, Any]],
+    *,
+    instructions: str | None = None,
+    tools: List[Dict[str, Any]] | None = None,
+    tool_choice: Any | None = None,
+    parallel_tool_calls: bool = False,
+    reasoning_param: Dict[str, Any] | None = None,
+    service_tier: str | None = None,
+):
+    access_token, account_id = get_effective_chatgpt_auth()
+    if not access_token or not account_id:
+        resp = make_response(
+            jsonify(
+                {
+                    "error": {
+                        "message": "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                    }
+                }
+            ),
+            401,
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+
+    include: List[str] = []
+    if isinstance(reasoning_param, dict):
+        include.append("reasoning.encrypted_content")
+
+    client_session_id = None
+    try:
+        client_session_id = (
+            flask_request.headers.get("X-Session-Id")
+            or flask_request.headers.get("session_id")
+            or None
+        )
+    except Exception:
+        client_session_id = None
+    session_id = ensure_session_id(instructions, input_items, client_session_id)
+
+    responses_payload = {
+        "model": model,
+        "instructions": instructions if isinstance(instructions, str) and instructions.strip() else instructions,
+        "input": input_items,
+        "tools": tools or [],
+        "tool_choice": tool_choice if tool_choice in ("auto", "none") or isinstance(tool_choice, dict) else "auto",
+        "parallel_tool_calls": bool(parallel_tool_calls),
+        "store": False,
+        "stream": True,
+        "prompt_cache_key": session_id,
+    }
+    if include:
+        responses_payload["include"] = include
+
+    if reasoning_param is not None:
+        responses_payload["reasoning"] = reasoning_param
+    if isinstance(service_tier, str) and service_tier.strip():
+        responses_payload["service_tier"] = service_tier.strip().lower()
+
+    return start_upstream_raw_request(
+        responses_payload,
+        session_id=session_id,
+        stream=True,
+    )
+
+
+def build_upstream_headers(
+    access_token: str,
+    account_id: str,
+    session_id: str,
+    *,
+    accept: str = "text/event-stream",
+) -> Dict[str, str]:
+    return {
+        "Authorization": f"Bearer {access_token}",
+        "Content-Type": "application/json",
+        "Accept": accept,
+        "chatgpt-account-id": account_id,
+        "OpenAI-Beta": "responses=experimental",
+        "session_id": session_id,
+    }
+
+
+def start_upstream_raw_request(
+    responses_payload: Dict[str, Any],
+    *,
+    session_id: str | None = None,
+    stream: bool = True,
+):
+    access_token, account_id = get_effective_chatgpt_auth()
+    if not access_token or not account_id:
+        resp = make_response(
+            jsonify(
+                {
+                    "error": {
+                        "message": "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                    }
+                }
+            ),
+            401,
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+
+    effective_session_id = session_id
+    if not isinstance(effective_session_id, str) or not effective_session_id.strip():
+        payload_prompt_cache_key = responses_payload.get("prompt_cache_key")
+        if isinstance(payload_prompt_cache_key, str) and payload_prompt_cache_key.strip():
+            effective_session_id = payload_prompt_cache_key.strip()
+    if not isinstance(effective_session_id, str) or not effective_session_id.strip():
+        effective_session_id = str(int(time.time() * 1000))
+
+    verbose = False
+    try:
+        verbose = bool(current_app.config.get("VERBOSE"))
+    except Exception:
+        verbose = False
+    if verbose:
+        _log_json("OUTBOUND >> ChatGPT Responses API payload", responses_payload)
+
+    headers = build_upstream_headers(
+        access_token,
+        account_id,
+        effective_session_id,
+        accept=("text/event-stream" if stream else "application/json"),
+    )
+
+    try:
+        upstream = requests.post(
+            CHATGPT_RESPONSES_URL,
+            headers=headers,
+            json=responses_payload,
+            stream=stream,
+            timeout=600,
+        )
+    except requests.RequestException as e:
+        resp = make_response(jsonify({"error": {"message": f"Upstream ChatGPT request failed: {e}"}}), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+    return upstream, None
+
+
+def build_upstream_websocket_url() -> str:
+    parsed = urlparse(CHATGPT_RESPONSES_URL)
+    scheme = parsed.scheme.lower()
+    if scheme == "https":
+        parsed = parsed._replace(scheme="wss")
+    elif scheme == "http":
+        parsed = parsed._replace(scheme="ws")
+    return urlunparse(parsed)
diff --git a/build/lib/chatmock/utils.py b/build/lib/chatmock/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..79703a5dc3c493e304169c6dcab720e75eb0ccdf
--- /dev/null
+++ b/build/lib/chatmock/utils.py
@@ -0,0 +1,874 @@
+from __future__ import annotations
+
+import base64
+import datetime
+import hashlib
+import json
+import os
+import secrets
+import sys
+from typing import Any, Dict, List, Optional, Tuple
+
+import requests
+
+from .config import CLIENT_ID_DEFAULT, OAUTH_TOKEN_URL
+
+
+def eprint(*args, **kwargs) -> None:
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def get_home_dir() -> str:
+    home = os.getenv("CHATGPT_LOCAL_HOME") or os.getenv("CODEX_HOME")
+    if not home:
+        home = os.path.expanduser("~/.chatgpt-local")
+    return home
+
+
+def read_auth_file() -> Dict[str, Any] | None:
+    for base in [
+        os.getenv("CHATGPT_LOCAL_HOME"),
+        os.getenv("CODEX_HOME"),
+        os.path.expanduser("~/.chatgpt-local"),
+        os.path.expanduser("~/.codex"),
+    ]:
+        if not base:
+            continue
+        path = os.path.join(base, "auth.json")
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            continue
+        except Exception:
+            continue
+    return None
+
+
+def write_auth_file(auth: Dict[str, Any]) -> bool:
+    home = get_home_dir()
+    try:
+        os.makedirs(home, exist_ok=True)
+    except Exception as exc:
+        eprint(f"ERROR: unable to create auth home directory {home}: {exc}")
+        return False
+    path = os.path.join(home, "auth.json")
+    try:
+        with open(path, "w", encoding="utf-8") as fp:
+            if hasattr(os, "fchmod"):
+                os.fchmod(fp.fileno(), 0o600)
+            json.dump(auth, fp, indent=2)
+        return True
+    except Exception as exc:
+        eprint(f"ERROR: unable to write auth file: {exc}")
+        return False
+
+
+def parse_jwt_claims(token: str) -> Dict[str, Any] | None:
+    if not token or token.count(".") != 2:
+        return None
+    try:
+        _, payload, _ = token.split(".")
+        padded = payload + "=" * (-len(payload) % 4)
+        data = base64.urlsafe_b64decode(padded.encode())
+        return json.loads(data.decode())
+    except Exception:
+        return None
+
+
+def generate_pkce() -> "PkceCodes":
+    from .models import PkceCodes
+
+    code_verifier = secrets.token_hex(64)
+    digest = hashlib.sha256(code_verifier.encode()).digest()
+    code_challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode()
+    return PkceCodes(code_verifier=code_verifier, code_challenge=code_challenge)
+
+
+def convert_chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def _normalize_image_data_url(url: str) -> str:
+        try:
+            if not isinstance(url, str):
+                return url
+            if not url.startswith("data:image/"):
+                return url
+            if ";base64," not in url:
+                return url
+            header, data = url.split(",", 1)
+            try:
+                from urllib.parse import unquote
+
+                data = unquote(data)
+            except Exception:
+                pass
+            data = data.strip().replace("\n", "").replace("\r", "")
+            data = data.replace("-", "+").replace("_", "/")
+            pad = (-len(data)) % 4
+            if pad:
+                data = data + ("=" * pad)
+            try:
+                base64.b64decode(data, validate=True)
+            except Exception:
+                return url
+            return f"{header},{data}"
+        except Exception:
+            return url
+
+    input_items: List[Dict[str, Any]] = []
+    for message in messages:
+        role = message.get("role")
+        if role == "system":
+            continue
+
+        if role == "tool":
+            call_id = message.get("tool_call_id") or message.get("id")
+            if isinstance(call_id, str) and call_id:
+                content = message.get("content", "")
+                if isinstance(content, list):
+                    texts = []
+                    for part in content:
+                        if isinstance(part, dict):
+                            t = part.get("text") or part.get("content")
+                            if isinstance(t, str) and t:
+                                texts.append(t)
+                    content = "\n".join(texts)
+                if isinstance(content, str):
+                    input_items.append(
+                        {
+                            "type": "function_call_output",
+                            "call_id": call_id,
+                            "output": content,
+                        }
+                    )
+            continue
+        if role == "assistant" and isinstance(message.get("tool_calls"), list):
+            for tc in message.get("tool_calls") or []:
+                if not isinstance(tc, dict):
+                    continue
+                tc_type = tc.get("type", "function")
+                if tc_type != "function":
+                    continue
+                call_id = tc.get("id") or tc.get("call_id")
+                fn = tc.get("function") if isinstance(tc.get("function"), dict) else {}
+                name = fn.get("name") if isinstance(fn, dict) else None
+                args = fn.get("arguments") if isinstance(fn, dict) else None
+                if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                    input_items.append(
+                        {
+                            "type": "function_call",
+                            "name": name,
+                            "arguments": args,
+                            "call_id": call_id,
+                        }
+                    )
+
+        content = message.get("content", "")
+        content_items: List[Dict[str, Any]] = []
+        if isinstance(content, list):
+            for part in content:
+                if not isinstance(part, dict):
+                    continue
+                ptype = part.get("type")
+                if ptype == "text":
+                    text = part.get("text") or part.get("content") or ""
+                    if isinstance(text, str) and text:
+                        kind = "output_text" if role == "assistant" else "input_text"
+                        content_items.append({"type": kind, "text": text})
+                elif ptype == "image_url":
+                    image = part.get("image_url")
+                    url = image.get("url") if isinstance(image, dict) else image
+                    if isinstance(url, str) and url:
+                        content_items.append({"type": "input_image", "image_url": _normalize_image_data_url(url)})
+        elif isinstance(content, str) and content:
+            kind = "output_text" if role == "assistant" else "input_text"
+            content_items.append({"type": kind, "text": content})
+
+        if not content_items:
+            continue
+        role_out = "assistant" if role == "assistant" else "user"
+        input_items.append({"type": "message", "role": role_out, "content": content_items})
+    return input_items
+
+
+def convert_tools_chat_to_responses(tools: Any) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    if not isinstance(tools, list):
+        return out
+    for t in tools:
+        if not isinstance(t, dict):
+            continue
+        if t.get("type") != "function":
+            continue
+        fn = t.get("function") if isinstance(t.get("function"), dict) else {}
+        name = fn.get("name") if isinstance(fn, dict) else None
+        if not isinstance(name, str) or not name:
+            continue
+        desc = fn.get("description") if isinstance(fn, dict) else None
+        params = fn.get("parameters") if isinstance(fn, dict) else None
+        if not isinstance(params, dict):
+            params = {"type": "object", "properties": {}}
+        out.append(
+            {
+                "type": "function",
+                "name": name,
+                "description": desc or "",
+                "strict": False,
+                "parameters": params,
+            }
+        )
+    return out
+
+
+def load_chatgpt_tokens(ensure_fresh: bool = True) -> tuple[str | None, str | None, str | None]:
+    auth = read_auth_file()
+    if not isinstance(auth, dict):
+        return None, None, None
+
+    tokens = auth.get("tokens") if isinstance(auth.get("tokens"), dict) else {}
+    access_token: Optional[str] = tokens.get("access_token")
+    account_id: Optional[str] = tokens.get("account_id")
+    id_token: Optional[str] = tokens.get("id_token")
+    refresh_token: Optional[str] = tokens.get("refresh_token")
+    last_refresh = auth.get("last_refresh")
+
+    if ensure_fresh and isinstance(refresh_token, str) and refresh_token and CLIENT_ID_DEFAULT:
+        needs_refresh = _should_refresh_access_token(access_token, last_refresh)
+        if needs_refresh or not (isinstance(access_token, str) and access_token):
+            refreshed = _refresh_chatgpt_tokens(refresh_token, CLIENT_ID_DEFAULT)
+            if refreshed:
+                access_token = refreshed.get("access_token") or access_token
+                id_token = refreshed.get("id_token") or id_token
+                refresh_token = refreshed.get("refresh_token") or refresh_token
+                account_id = refreshed.get("account_id") or account_id
+
+                updated_tokens = dict(tokens)
+                if isinstance(access_token, str) and access_token:
+                    updated_tokens["access_token"] = access_token
+                if isinstance(id_token, str) and id_token:
+                    updated_tokens["id_token"] = id_token
+                if isinstance(refresh_token, str) and refresh_token:
+                    updated_tokens["refresh_token"] = refresh_token
+                if isinstance(account_id, str) and account_id:
+                    updated_tokens["account_id"] = account_id
+
+                persisted = _persist_refreshed_auth(auth, updated_tokens)
+                if persisted is not None:
+                    auth, tokens = persisted
+                else:
+                    tokens = updated_tokens
+
+    if not isinstance(account_id, str) or not account_id:
+        account_id = _derive_account_id(id_token)
+
+    access_token = access_token if isinstance(access_token, str) and access_token else None
+    id_token = id_token if isinstance(id_token, str) and id_token else None
+    account_id = account_id if isinstance(account_id, str) and account_id else None
+    return access_token, account_id, id_token
+
+
+def _should_refresh_access_token(access_token: Optional[str], last_refresh: Any) -> bool:
+    if not isinstance(access_token, str) or not access_token:
+        return True
+
+    claims = parse_jwt_claims(access_token) or {}
+    exp = claims.get("exp") if isinstance(claims, dict) else None
+    now = datetime.datetime.now(datetime.timezone.utc)
+    if isinstance(exp, (int, float)):
+        try:
+            expiry = datetime.datetime.fromtimestamp(float(exp), datetime.timezone.utc)
+        except (OverflowError, OSError, ValueError):
+            expiry = None
+        if expiry is not None:
+            return expiry <= now + datetime.timedelta(minutes=5)
+
+    if isinstance(last_refresh, str):
+        refreshed_at = _parse_iso8601(last_refresh)
+        if refreshed_at is not None:
+            return refreshed_at <= now - datetime.timedelta(minutes=55)
+    return False
+
+
+def _refresh_chatgpt_tokens(refresh_token: str, client_id: str) -> Optional[Dict[str, Optional[str]]]:
+    payload = {
+        "grant_type": "refresh_token",
+        "refresh_token": refresh_token,
+        "client_id": client_id,
+        "scope": "openid profile email offline_access",
+    }
+
+    try:
+        resp = requests.post(OAUTH_TOKEN_URL, json=payload, timeout=30)
+    except requests.RequestException as exc:
+        eprint(f"ERROR: failed to refresh ChatGPT token: {exc}")
+        return None
+
+    if resp.status_code >= 400:
+        eprint(f"ERROR: refresh token request returned status {resp.status_code}")
+        return None
+
+    try:
+        data = resp.json()
+    except ValueError as exc:
+        eprint(f"ERROR: unable to parse refresh token response: {exc}")
+        return None
+
+    id_token = data.get("id_token")
+    access_token = data.get("access_token")
+    new_refresh_token = data.get("refresh_token") or refresh_token
+    if not isinstance(id_token, str) or not isinstance(access_token, str):
+        eprint("ERROR: refresh token response missing expected tokens")
+        return None
+
+    account_id = _derive_account_id(id_token)
+    new_refresh_token = new_refresh_token if isinstance(new_refresh_token, str) and new_refresh_token else refresh_token
+    return {
+        "id_token": id_token,
+        "access_token": access_token,
+        "refresh_token": new_refresh_token,
+        "account_id": account_id,
+    }
+
+
+def _persist_refreshed_auth(auth: Dict[str, Any], updated_tokens: Dict[str, Any]) -> Optional[Tuple[Dict[str, Any], Dict[str, Any]]]:
+    updated_auth = dict(auth)
+    updated_auth["tokens"] = updated_tokens
+    updated_auth["last_refresh"] = _now_iso8601()
+    if write_auth_file(updated_auth):
+        return updated_auth, updated_tokens
+    eprint("ERROR: unable to persist refreshed auth tokens")
+    return None
+
+
+def _derive_account_id(id_token: Optional[str]) -> Optional[str]:
+    if not isinstance(id_token, str) or not id_token:
+        return None
+    claims = parse_jwt_claims(id_token) or {}
+    auth_claims = claims.get("https://api.openai.com/auth") if isinstance(claims, dict) else None
+    if isinstance(auth_claims, dict):
+        account_id = auth_claims.get("chatgpt_account_id")
+        if isinstance(account_id, str) and account_id:
+            return account_id
+    return None
+
+
+def _parse_iso8601(value: str) -> Optional[datetime.datetime]:
+    try:
+        if value.endswith("Z"):
+            value = value[:-1] + "+00:00"
+        dt = datetime.datetime.fromisoformat(value)
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=datetime.timezone.utc)
+        return dt.astimezone(datetime.timezone.utc)
+    except Exception:
+        return None
+
+
+def _now_iso8601() -> str:
+    return datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")
+
+
+def get_effective_chatgpt_auth() -> tuple[str | None, str | None]:
+    access_token, account_id, id_token = load_chatgpt_tokens()
+    if not account_id:
+        account_id = _derive_account_id(id_token)
+    return access_token, account_id
+
+
+def sse_translate_chat(
+    upstream,
+    model: str,
+    created: int,
+    verbose: bool = False,
+    vlog=None,
+    reasoning_compat: str = "think-tags",
+    *,
+    include_usage: bool = False,
+):
+    response_id = "chatcmpl-stream"
+    compat = (reasoning_compat or "think-tags").strip().lower()
+    think_open = False
+    think_closed = False
+    saw_output = False
+    sent_stop_chunk = False
+    saw_any_summary = False
+    pending_summary_paragraph = False
+    upstream_usage = None
+    ws_state: dict[str, Any] = {}
+    ws_index: dict[str, int] = {}
+    ws_next_index: int = 0
+    
+    def _serialize_tool_args(eff_args: Any) -> str:
+        """
+        Serialize tool call arguments with proper JSON handling.
+        
+        Args:
+            eff_args: Arguments to serialize (dict, list, str, or other)
+            
+        Returns:
+            JSON string representation of the arguments
+        """
+        if isinstance(eff_args, (dict, list)):
+            return json.dumps(eff_args)
+        elif isinstance(eff_args, str):
+            try:
+                parsed = json.loads(eff_args)
+                if isinstance(parsed, (dict, list)):
+                    return json.dumps(parsed) 
+                else:
+                    return json.dumps({"query": eff_args})  
+            except (json.JSONDecodeError, ValueError):
+                return json.dumps({"query": eff_args})
+        else:
+            return "{}"
+    
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        try:
+            line_iterator = upstream.iter_lines(decode_unicode=False)
+        except requests.exceptions.ChunkedEncodingError as e:
+            if verbose and vlog:
+                vlog(f"Failed to start stream: {e}")
+            yield b"data: [DONE]\n\n"
+            return
+
+        for raw in line_iterator:
+            try:
+                if not raw:
+                    continue
+                line = (
+                    raw.decode("utf-8", errors="ignore")
+                    if isinstance(raw, (bytes, bytearray))
+                    else raw
+                )
+                if verbose and vlog:
+                    vlog(line)
+                if not line.startswith("data: "):
+                    continue
+                data = line[len("data: ") :].strip()
+                if not data:
+                    continue
+                if data == "[DONE]":
+                    break
+                try:
+                    evt = json.loads(data)
+                except (json.JSONDecodeError, UnicodeDecodeError):
+                    continue
+            except (
+                requests.exceptions.ChunkedEncodingError,
+                ConnectionError,
+                BrokenPipeError,
+            ) as e:
+                # Connection interrupted mid-stream - end gracefully
+                if verbose and vlog:
+                    vlog(f"Stream interrupted: {e}")
+                yield b"data: [DONE]\n\n"
+                return
+            kind = evt.get("type")
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+
+            if isinstance(kind, str) and ("web_search_call" in kind):
+                try:
+                    call_id = evt.get("item_id") or "ws_call"
+                    if verbose and vlog:
+                        try:
+                            vlog(f"CM_TOOLS {kind} id={call_id} -> tool_calls(web_search)")
+                        except Exception:
+                            pass
+                    item = evt.get('item') if isinstance(evt.get('item'), dict) else {}
+                    params_dict = ws_state.setdefault(call_id, {}) if isinstance(ws_state.get(call_id), dict) else {}
+                    def _merge_from(src):
+                        if not isinstance(src, dict):
+                            return
+                        for whole in ('parameters','args','arguments','input'):
+                            if isinstance(src.get(whole), dict):
+                                params_dict.update(src.get(whole))
+                        if isinstance(src.get('query'), str): params_dict.setdefault('query', src.get('query'))
+                        if isinstance(src.get('q'), str): params_dict.setdefault('query', src.get('q'))
+                        for rk in ('recency','time_range','days'):
+                            if src.get(rk) is not None and rk not in params_dict: params_dict[rk] = src.get(rk)
+                        for dk in ('domains','include_domains','include'):
+                            if isinstance(src.get(dk), list) and 'domains' not in params_dict: params_dict['domains'] = src.get(dk)
+                        for mk in ('max_results','topn','limit'):
+                            if src.get(mk) is not None and 'max_results' not in params_dict: params_dict['max_results'] = src.get(mk)
+                    _merge_from(item)
+                    _merge_from(evt if isinstance(evt, dict) else None)
+                    params = params_dict if params_dict else None
+                    if isinstance(params, dict):
+                        try:
+                            ws_state.setdefault(call_id, {}).update(params)
+                        except Exception:
+                            pass
+                    eff_params = ws_state.get(call_id, params if isinstance(params, (dict, list, str)) else {})
+                    args_str = _serialize_tool_args(eff_params)
+                    if call_id not in ws_index:
+                        ws_index[call_id] = ws_next_index
+                        ws_next_index += 1
+                    _idx = ws_index.get(call_id, 0)
+                    delta_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {
+                                    "tool_calls": [
+                                        {
+                                            "index": _idx,
+                                            "id": call_id,
+                                            "type": "function",
+                                            "function": {"name": "web_search", "arguments": args_str},
+                                        }
+                                    ]
+                                },
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(delta_chunk)}\n\n".encode("utf-8")
+                    if kind.endswith(".completed") or kind.endswith(".done"):
+                        finish_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
+                            ],
+                        }
+                        yield f"data: {json.dumps(finish_chunk)}\n\n".encode("utf-8")
+                except Exception:
+                    pass
+
+            if kind == "response.output_text.delta":
+                delta = evt.get("delta") or ""
+                if compat == "think-tags" and think_open and not think_closed:
+                    close_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
+                    }
+                    yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
+                    think_open = False
+                    think_closed = True
+                saw_output = True
+                chunk = {
+                    "id": response_id,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and (item.get("type") == "function_call" or item.get("type") == "web_search_call"):
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ("web_search" if item.get("type") == "web_search_call" else "")
+                    raw_args = item.get("arguments") or item.get("parameters")
+                    if isinstance(raw_args, dict):
+                        try:
+                            ws_state.setdefault(call_id, {}).update(raw_args)
+                        except Exception:
+                            pass
+                    eff_args = ws_state.get(call_id, raw_args if isinstance(raw_args, (dict, list, str)) else {})
+                    try:
+                        args = _serialize_tool_args(eff_args)
+                    except Exception:
+                        args = "{}"
+                    if item.get("type") == "web_search_call" and verbose and vlog:
+                        try:
+                            vlog(f"CM_TOOLS response.output_item.done web_search_call id={call_id} has_args={bool(args)}")
+                        except Exception:
+                            pass
+                    if call_id not in ws_index:
+                        ws_index[call_id] = ws_next_index
+                        ws_next_index += 1
+                    _idx = ws_index.get(call_id, 0)
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        delta_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {
+                                        "tool_calls": [
+                                            {
+                                                "index": _idx,
+                                                "id": call_id,
+                                                "type": "function",
+                                                "function": {"name": name, "arguments": args},
+                                            }
+                                        ]
+                                    },
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(delta_chunk)}\n\n".encode("utf-8")
+
+                        finish_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {}, "finish_reason": "tool_calls"}],
+                        }
+                        yield f"data: {json.dumps(finish_chunk)}\n\n".encode("utf-8")
+            elif kind == "response.reasoning_summary_part.added":
+                if compat in ("think-tags", "o3"):
+                    if saw_any_summary:
+                        pending_summary_paragraph = True
+                    else:
+                        saw_any_summary = True
+            elif kind in ("response.reasoning_summary_text.delta", "response.reasoning_text.delta"):
+                delta_txt = evt.get("delta") or ""
+                if compat == "o3":
+                    if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                        nl_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"reasoning": {"content": [{"type": "text", "text": "\n"}]}},
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(nl_chunk)}\n\n".encode("utf-8")
+                        pending_summary_paragraph = False
+                    chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"reasoning": {"content": [{"type": "text", "text": delta_txt}]}},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                elif compat == "think-tags":
+                    if not think_open and not think_closed:
+                        open_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {"content": "<think>"}, "finish_reason": None}],
+                        }
+                        yield f"data: {json.dumps(open_chunk)}\n\n".encode("utf-8")
+                        think_open = True
+                    if think_open and not think_closed:
+                        if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                            nl_chunk = {
+                                "id": response_id,
+                                "object": "chat.completion.chunk",
+                                "created": created,
+                                "model": model,
+                                "choices": [{"index": 0, "delta": {"content": "\n"}, "finish_reason": None}],
+                            }
+                            yield f"data: {json.dumps(nl_chunk)}\n\n".encode("utf-8")
+                            pending_summary_paragraph = False
+                        content_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {"content": delta_txt}, "finish_reason": None}],
+                        }
+                        yield f"data: {json.dumps(content_chunk)}\n\n".encode("utf-8")
+                else:
+                    if kind == "response.reasoning_summary_text.delta":
+                        chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"reasoning_summary": delta_txt, "reasoning": delta_txt},
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                    else:
+                        chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {"index": 0, "delta": {"reasoning": delta_txt}, "finish_reason": None}
+                            ],
+                        }
+                        yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif isinstance(kind, str) and kind.endswith(".done"):
+                pass
+            elif kind == "response.output_text.done":
+                chunk = {
+                    "id": response_id,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                sent_stop_chunk = True
+            elif kind == "response.failed":
+                err = evt.get("response", {}).get("error", {}).get("message", "response.failed")
+                chunk = {"error": {"message": err}}
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.completed":
+                m = _extract_usage(evt)
+                if m:
+                    upstream_usage = m
+                if compat == "think-tags" and think_open and not think_closed:
+                    close_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
+                    }
+                    yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
+                    think_open = False
+                    think_closed = True
+                if not sent_stop_chunk:
+                    chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                    sent_stop_chunk = True
+
+                if include_usage and upstream_usage:
+                    try:
+                        usage_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {}, "finish_reason": None}],
+                            "usage": upstream_usage,
+                        }
+                        yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
+                    except Exception:
+                        pass
+                yield b"data: [DONE]\n\n"
+                break
+    finally:
+        upstream.close()
+
+
+def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None, *, include_usage: bool = False):
+    response_id = "cmpl-stream"
+    upstream_usage = None
+    
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw_line in upstream.iter_lines(decode_unicode=False):
+            if not raw_line:
+                continue
+            line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+            if verbose and vlog:
+                vlog(line)
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data or data == "[DONE]":
+                if data == "[DONE]":
+                    chunk = {
+                        "id": response_id,
+                        "object": "text_completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "text": "", "finish_reason": "stop"}],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                continue
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            if kind == "response.output_text.delta":
+                delta_text = evt.get("delta") or ""
+                chunk = {
+                    "id": response_id,
+                    "object": "text_completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "text": delta_text, "finish_reason": None}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.output_text.done":
+                chunk = {
+                    "id": response_id,
+                    "object": "text_completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "text": "", "finish_reason": "stop"}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.completed":
+                m = _extract_usage(evt)
+                if m:
+                    upstream_usage = m
+                if include_usage and upstream_usage:
+                    try:
+                        usage_chunk = {
+                            "id": response_id,
+                            "object": "text_completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "text": "", "finish_reason": None}],
+                            "usage": upstream_usage,
+                        }
+                        yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
+                    except Exception:
+                        pass
+                yield b"data: [DONE]\n\n"
+                break
+    finally:
+        upstream.close()
diff --git a/build/lib/chatmock/version.py b/build/lib/chatmock/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..17d6d1aaa10aae5181838a3cfa812d120d51b45e
--- /dev/null
+++ b/build/lib/chatmock/version.py
@@ -0,0 +1,4 @@
+from __future__ import annotations
+
+
+__version__ = "1.37"
diff --git a/build/lib/chatmock/websocket_routes.py b/build/lib/chatmock/websocket_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..37fcfe034d79667c4e7114d7380914fadc0f740c
--- /dev/null
+++ b/build/lib/chatmock/websocket_routes.py
@@ -0,0 +1,225 @@
+from __future__ import annotations
+
+import json
+import os
+import ssl
+from typing import Any, Dict
+
+import certifi
+from flask import current_app, request
+from flask_sock import Sock
+from websockets.sync.client import connect as websocket_connect
+from websockets.exceptions import ConnectionClosed
+
+from .responses_api import (
+    ResponsesRequestError,
+    extract_client_session_id,
+    normalize_responses_payload,
+)
+from .session import (
+    clear_responses_reuse_state,
+    note_responses_stream_event,
+    prepare_responses_request_for_session,
+)
+from .upstream import build_upstream_headers, build_upstream_websocket_url
+from .utils import get_effective_chatgpt_auth
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+
+
+def _error_event(message: str, *, status_code: int = 400, code: str | None = None) -> Dict[str, Any]:
+    error: Dict[str, Any] = {"message": message}
+    if code:
+        error["code"] = code
+    return {"type": "error", "status_code": status_code, "error": error}
+
+
+def _is_terminal_event(event: Any) -> bool:
+    if not isinstance(event, dict):
+        return False
+    kind = event.get("type")
+    return kind in ("response.completed", "response.failed", "error")
+
+
+def _build_websocket_ssl_context() -> ssl.SSLContext:
+    cafile = (
+        os.getenv("CODEX_CA_CERTIFICATE")
+        or os.getenv("SSL_CERT_FILE")
+        or certifi.where()
+    )
+    return ssl.create_default_context(cafile=cafile)
+
+
+def connect_upstream_websocket(url: str, headers: Dict[str, str]):
+    return websocket_connect(
+        url,
+        additional_headers=headers,
+        open_timeout=15,
+        ssl=_build_websocket_ssl_context(),
+    )
+
+
+def register_websocket_routes(sock: Sock) -> None:
+    @sock.route("/v1/responses")
+    def responses_websocket(ws) -> None:
+        verbose = bool(current_app.config.get("VERBOSE"))
+        upstream_ws = None
+        upstream_session_id: str | None = None
+        active_session_id: str | None = None
+
+        def _send_error(message: str, *, status_code: int = 400, code: str | None = None) -> None:
+            evt = _error_event(message, status_code=status_code, code=code)
+            if verbose:
+                _log_json("STREAM OUT WS /v1/responses (error)", evt)
+            try:
+                ws.send(json.dumps(evt))
+            except Exception:
+                pass
+
+        try:
+            while True:
+                incoming = ws.receive()
+                if incoming is None:
+                    break
+
+                if isinstance(incoming, bytes):
+                    incoming_text = incoming.decode("utf-8", errors="ignore")
+                else:
+                    incoming_text = str(incoming)
+                if verbose:
+                    print("IN WS /v1/responses\n" + incoming_text)
+
+                try:
+                    payload = json.loads(incoming_text)
+                except Exception:
+                    _send_error("Websocket frames must be valid JSON objects.", status_code=400)
+                    break
+
+                if not isinstance(payload, dict):
+                    _send_error("Websocket frames must be JSON objects.", status_code=400)
+                    break
+
+                client_session_id = extract_client_session_id(request.headers)
+                outbound_text = incoming_text
+                session_id = upstream_session_id
+
+                if payload.get("type") == "response.create":
+                    try:
+                        normalized = normalize_responses_payload(
+                            payload,
+                            config=current_app.config,
+                            client_session_id=client_session_id,
+                        )
+                    except ResponsesRequestError as exc:
+                        _send_error(str(exc), status_code=exc.status_code, code=exc.code)
+                        continue
+
+                    if normalized.service_tier_resolution.warning_message and verbose:
+                        print(f"[FastMode] {normalized.service_tier_resolution.warning_message}")
+                    prepared = prepare_responses_request_for_session(
+                        normalized.session_id,
+                        normalized.payload,
+                        allow_previous_response_id=True,
+                    )
+                    outbound_text = json.dumps(prepared.payload)
+                    session_id = normalized.session_id
+                    active_session_id = normalized.session_id
+                    if verbose:
+                        _log_json("OUTBOUND >> ChatGPT Responses WS payload", prepared.payload)
+                elif upstream_ws is None:
+                    _send_error(
+                        "The first websocket message must be a response.create request.",
+                        status_code=400,
+                    )
+                    break
+
+                if upstream_ws is None or (session_id and session_id != upstream_session_id):
+                    access_token, account_id = get_effective_chatgpt_auth()
+                    if not access_token or not account_id:
+                        if session_id:
+                            clear_responses_reuse_state(session_id)
+                        _send_error(
+                            "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                            status_code=401,
+                        )
+                        break
+
+                    if upstream_ws is not None:
+                        try:
+                            upstream_ws.close()
+                        except Exception:
+                            pass
+
+                    effective_session_id = session_id or client_session_id or ""
+                    try:
+                        upstream_ws = connect_upstream_websocket(
+                            build_upstream_websocket_url(),
+                            build_upstream_headers(
+                                access_token,
+                                account_id,
+                                effective_session_id,
+                                accept="application/json",
+                            ),
+                        )
+                    except Exception as exc:
+                        if session_id:
+                            clear_responses_reuse_state(session_id)
+                        _send_error(
+                            f"Upstream websocket connection failed: {exc}",
+                            status_code=502,
+                        )
+                        break
+                    upstream_session_id = effective_session_id
+
+                upstream_ws.send(outbound_text)
+
+                while True:
+                    try:
+                        upstream_message = upstream_ws.recv()
+                    except ConnectionClosed:
+                        if active_session_id:
+                            clear_responses_reuse_state(active_session_id)
+                        _send_error("Upstream websocket closed unexpectedly.", status_code=502)
+                        return
+                    if upstream_message is None:
+                        if active_session_id:
+                            clear_responses_reuse_state(active_session_id)
+                        _send_error("Upstream websocket closed unexpectedly.", status_code=502)
+                        return
+                    if verbose:
+                        try:
+                            print("STREAM OUT WS /v1/responses\n" + str(upstream_message))
+                        except Exception:
+                            pass
+                    ws.send(upstream_message)
+
+                    try:
+                        parsed = json.loads(upstream_message)
+                    except Exception:
+                        parsed = None
+                    if isinstance(parsed, dict) and active_session_id:
+                        note_responses_stream_event(active_session_id, parsed)
+                    if _is_terminal_event(parsed):
+                        if isinstance(parsed, dict) and parsed.get("type") in ("response.failed", "error"):
+                            if upstream_ws is not None:
+                                try:
+                                    upstream_ws.close()
+                                except Exception:
+                                    pass
+                            upstream_ws = None
+                            upstream_session_id = None
+                        break
+        finally:
+            if upstream_ws is not None:
+                try:
+                    upstream_ws.close()
+                except Exception:
+                    pass
diff --git a/chatmock.egg-info/PKG-INFO b/chatmock.egg-info/PKG-INFO
new file mode 100644
index 0000000000000000000000000000000000000000..817e23c4025bdc6659908ff01b94cef4ca799cff
--- /dev/null
+++ b/chatmock.egg-info/PKG-INFO
@@ -0,0 +1,200 @@
+Metadata-Version: 2.4
+Name: chatmock
+Version: 1.37
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: blinker==1.9.0
+Requires-Dist: certifi==2025.8.3
+Requires-Dist: flask==3.1.1
+Requires-Dist: flask-sock==0.7.0
+Requires-Dist: idna==3.10
+Requires-Dist: itsdangerous==2.2.0
+Requires-Dist: jinja2==3.1.6
+Requires-Dist: markupsafe==3.0.2
+Requires-Dist: requests==2.32.5
+Requires-Dist: urllib3==2.5.0
+Requires-Dist: websockets==15.0.1
+Requires-Dist: werkzeug==3.1.3
+Provides-Extra: gui
+Requires-Dist: Pillow==11.3.0; extra == "gui"
+Requires-Dist: PyInstaller==6.16.0; extra == "gui"
+Requires-Dist: PySide6==6.9.2; extra == "gui"
+Dynamic: license-file
+
+<div align="center">
+
+# ChatMock
+
+**Allows Codex to work in your favourite chat apps and coding tools.**
+
+[![PyPI](https://img.shields.io/pypi/v/chatmock?color=blue&label=pypi)](https://pypi.org/project/chatmock/)
+[![Python](https://img.shields.io/pypi/pyversions/chatmock)](https://pypi.org/project/chatmock/)
+[![License](https://img.shields.io/github/license/RayBytes/ChatMock)](LICENSE)
+[![Stars](https://img.shields.io/github/stars/RayBytes/ChatMock?style=flat)](https://github.com/RayBytes/ChatMock/stargazers)
+[![Last Commit](https://img.shields.io/github/last-commit/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/commits/main)
+[![Issues](https://img.shields.io/github/issues/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/issues)
+
+<br>
+
+
+</div>
+
+<br>
+
+## Install
+
+#### Homebrew
+```bash
+brew tap RayBytes/chatmock
+brew install chatmock
+```
+
+#### pipx / pip
+```bash
+pipx install chatmock
+```
+
+#### GUI
+Download from [releases](https://github.com/RayBytes/ChatMock/releases) (macOS & Windows)
+
+#### Docker
+See [DOCKER.md](DOCKER.md)
+
+<br>
+
+## Getting Started
+
+```bash
+# 1. Sign in with your ChatGPT account
+chatmock login
+
+# 2. Start the server
+chatmock serve
+```
+
+The server runs at `http://127.0.0.1:8000` by default. Use `http://127.0.0.1:8000/v1` as your base URL for OpenAI-compatible apps.
+
+<br>
+
+## Usage
+
+<details open>
+<summary><b>Python</b></summary>
+
+```python
+from openai import OpenAI
+
+client = OpenAI(
+    base_url="http://127.0.0.1:8000/v1",
+    api_key="anything"  # not checked
+)
+
+response = client.chat.completions.create(
+    model="gpt-5.4",
+    messages=[{"role": "user", "content": "hello"}]
+)
+print(response.choices[0].message.content)
+```
+
+</details>
+
+<details>
+<summary><b>cURL</b></summary>
+
+```bash
+curl http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5.4",
+    "messages": [{"role": "user", "content": "hello"}]
+  }'
+```
+
+</details>
+
+<br>
+
+## Supported Models
+
+- `gpt-5.4`
+- `gpt-5.4-mini`
+- `gpt-5.2`
+- `gpt-5.1`
+- `gpt-5`
+- `gpt-5.3-codex`
+- `gpt-5.3-codex-spark`
+- `gpt-5.2-codex`
+- `gpt-5-codex`
+- `gpt-5.1-codex`
+- `gpt-5.1-codex-max`
+- `gpt-5.1-codex-mini`
+- `codex-mini`
+
+<br>
+
+## Features
+
+- Tool / function calling
+- Vision / image input
+- Thinking summaries (via think tags)
+- Configurable thinking effort
+- Fast mode for supported models
+- Web search tool
+- OpenAI-compatible `/v1/responses` (HTTP + WebSocket)
+- Ollama-compatible endpoints
+- Reasoning effort exposed as separate models (optional)
+
+<br>
+
+## Configuration
+
+All flags go after `chatmock serve`. These can also be set as environment variables.
+
+| Flag | Env var | Options | Default | Description |
+|------|---------|---------|---------|-------------|
+| `--reasoning-effort` | `CHATGPT_LOCAL_REASONING_EFFORT` | none, minimal, low, medium, high, xhigh | medium | How hard the model thinks |
+| `--reasoning-summary` | `CHATGPT_LOCAL_REASONING_SUMMARY` | auto, concise, detailed, none | auto | Thinking summary verbosity |
+| `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client |
+| `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models |
+| `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web |
+| `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |
+
+<details>
+<summary><b>Web search in a request</b></summary>
+
+```json
+{
+  "model": "gpt-5.4",
+  "messages": [{"role": "user", "content": "latest news on ..."}],
+  "responses_tools": [{"type": "web_search"}],
+  "responses_tool_choice": "auto"
+}
+```
+
+</details>
+
+<details>
+<summary><b>Fast mode in a request</b></summary>
+
+```json
+{
+  "model": "gpt-5.4",
+  "input": "summarize this",
+  "fast_mode": true
+}
+```
+
+</details>
+
+<br>
+
+## Notes
+
+Use responsibly and at your own risk. This project is not affiliated with OpenAI.
+
+<br>
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=RayBytes/ChatMock&type=Timeline)](https://www.star-history.com/#RayBytes/ChatMock&Timeline)
diff --git a/chatmock.egg-info/SOURCES.txt b/chatmock.egg-info/SOURCES.txt
new file mode 100644
index 0000000000000000000000000000000000000000..9b04f9155cbafdbc2cc076e2a32a668d6f17f899
--- /dev/null
+++ b/chatmock.egg-info/SOURCES.txt
@@ -0,0 +1,34 @@
+LICENSE
+README.md
+pyproject.toml
+chatmock/__init__.py
+chatmock/app.py
+chatmock/cli.py
+chatmock/config.py
+chatmock/fast_mode.py
+chatmock/http.py
+chatmock/limits.py
+chatmock/model_registry.py
+chatmock/models.py
+chatmock/oauth.py
+chatmock/prompt.md
+chatmock/prompt_gpt5_codex.md
+chatmock/reasoning.py
+chatmock/responses_api.py
+chatmock/routes_ollama.py
+chatmock/routes_openai.py
+chatmock/session.py
+chatmock/transform.py
+chatmock/upstream.py
+chatmock/utils.py
+chatmock/version.py
+chatmock/websocket_routes.py
+chatmock.egg-info/PKG-INFO
+chatmock.egg-info/SOURCES.txt
+chatmock.egg-info/dependency_links.txt
+chatmock.egg-info/entry_points.txt
+chatmock.egg-info/requires.txt
+chatmock.egg-info/top_level.txt
+tests/test_fast_mode.py
+tests/test_models.py
+tests/test_routes.py
\ No newline at end of file
diff --git a/chatmock.egg-info/dependency_links.txt b/chatmock.egg-info/dependency_links.txt
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/chatmock.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/chatmock.egg-info/entry_points.txt b/chatmock.egg-info/entry_points.txt
new file mode 100644
index 0000000000000000000000000000000000000000..fce63558a8cd084eb815b63b68bb8f54221d8306
--- /dev/null
+++ b/chatmock.egg-info/entry_points.txt
@@ -0,0 +1,2 @@
+[console_scripts]
+chatmock = chatmock.cli:main
diff --git a/chatmock.egg-info/requires.txt b/chatmock.egg-info/requires.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f147691c4b5581e65a1b121d643b0d7b743a3305
--- /dev/null
+++ b/chatmock.egg-info/requires.txt
@@ -0,0 +1,17 @@
+blinker==1.9.0
+certifi==2025.8.3
+flask==3.1.1
+flask-sock==0.7.0
+idna==3.10
+itsdangerous==2.2.0
+jinja2==3.1.6
+markupsafe==3.0.2
+requests==2.32.5
+urllib3==2.5.0
+websockets==15.0.1
+werkzeug==3.1.3
+
+[gui]
+Pillow==11.3.0
+PyInstaller==6.16.0
+PySide6==6.9.2
diff --git a/chatmock.egg-info/top_level.txt b/chatmock.egg-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d52d6a9af19735f3891496f25be24a1f0433cf0f
--- /dev/null
+++ b/chatmock.egg-info/top_level.txt
@@ -0,0 +1 @@
+chatmock
diff --git a/chatmock.py b/chatmock.py
new file mode 100644
index 0000000000000000000000000000000000000000..89a96d211d56fe9550b743833944636df3086bd6
--- /dev/null
+++ b/chatmock.py
@@ -0,0 +1,7 @@
+from __future__ import annotations
+
+from chatmock.cli import main
+
+if __name__ == "__main__":
+    main()
+
diff --git a/chatmock/__init__.py b/chatmock/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ae049287c6b4e8e09b809d4c5cda847c9f64cc2
--- /dev/null
+++ b/chatmock/__init__.py
@@ -0,0 +1,5 @@
+from __future__ import annotations
+
+from .app import create_app
+from .cli import main
+from .version import __version__
diff --git a/chatmock/__pycache__/__init__.cpython-314.pyc b/chatmock/__pycache__/__init__.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f541e3b32c2ab6914dc90dec8755786a21e20a7c
Binary files /dev/null and b/chatmock/__pycache__/__init__.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/app.cpython-314.pyc b/chatmock/__pycache__/app.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc8cdef7b04f2ff930cc4a05f7787ce71b15fd97
Binary files /dev/null and b/chatmock/__pycache__/app.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/cli.cpython-314.pyc b/chatmock/__pycache__/cli.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a83b4fa4d7ef96d9b618389a69bbbfd0d68f27d7
Binary files /dev/null and b/chatmock/__pycache__/cli.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/config.cpython-314.pyc b/chatmock/__pycache__/config.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..29d7260eb768dd763aa68e784ad92f400b7b7b74
Binary files /dev/null and b/chatmock/__pycache__/config.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/fast_mode.cpython-314.pyc b/chatmock/__pycache__/fast_mode.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a5abc1f4382bdbfbd7f8094a4cd133a666b02758
Binary files /dev/null and b/chatmock/__pycache__/fast_mode.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/http.cpython-314.pyc b/chatmock/__pycache__/http.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3dd24c29832ff1000d3cdc2ad6231ed9a6847436
Binary files /dev/null and b/chatmock/__pycache__/http.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/limits.cpython-314.pyc b/chatmock/__pycache__/limits.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3b5298084ada6f3e929b7afc0c900a4541b8ede
Binary files /dev/null and b/chatmock/__pycache__/limits.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/model_registry.cpython-314.pyc b/chatmock/__pycache__/model_registry.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..258512c42c31e5cd7f0c2140587ba1034c3821f3
Binary files /dev/null and b/chatmock/__pycache__/model_registry.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/models.cpython-314.pyc b/chatmock/__pycache__/models.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b7b7a4ea704d870ddf3af5261c1aad615119194
Binary files /dev/null and b/chatmock/__pycache__/models.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/oauth.cpython-314.pyc b/chatmock/__pycache__/oauth.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f069fe20370b5d3aa9d15afffe7d837949fd201f
Binary files /dev/null and b/chatmock/__pycache__/oauth.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/reasoning.cpython-314.pyc b/chatmock/__pycache__/reasoning.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0644ef7ff45404078c149ac6b2bc8802292a5bf2
Binary files /dev/null and b/chatmock/__pycache__/reasoning.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/responses_api.cpython-314.pyc b/chatmock/__pycache__/responses_api.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6f813ff2bbd091c7534981d1a23cca4c81ce53cf
Binary files /dev/null and b/chatmock/__pycache__/responses_api.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/routes_custom.cpython-314.pyc b/chatmock/__pycache__/routes_custom.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ade3d87dfeb50d07e2244a99f7709336d80b8783
Binary files /dev/null and b/chatmock/__pycache__/routes_custom.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/routes_ollama.cpython-314.pyc b/chatmock/__pycache__/routes_ollama.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ee3a17c0d3550ad508b0ac54cf603f05009a7391
Binary files /dev/null and b/chatmock/__pycache__/routes_ollama.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/routes_openai.cpython-314.pyc b/chatmock/__pycache__/routes_openai.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9c632fb53d7a201e0d51b4a4a636fe8e951749f0
Binary files /dev/null and b/chatmock/__pycache__/routes_openai.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/session.cpython-314.pyc b/chatmock/__pycache__/session.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..33094fa9c6adba61169664005e216e9dfe06830c
Binary files /dev/null and b/chatmock/__pycache__/session.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/transform.cpython-314.pyc b/chatmock/__pycache__/transform.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71024ca35547f37c819d641ddf744eb33094c867
Binary files /dev/null and b/chatmock/__pycache__/transform.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/upstream.cpython-314.pyc b/chatmock/__pycache__/upstream.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..549ec1dd015593c912497aab27940a13f34f59fb
Binary files /dev/null and b/chatmock/__pycache__/upstream.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/utils.cpython-314.pyc b/chatmock/__pycache__/utils.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be9ccab25c95ee0235e0a1f88231d144e9694b40
Binary files /dev/null and b/chatmock/__pycache__/utils.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/version.cpython-314.pyc b/chatmock/__pycache__/version.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3290304b76fafdb07a94d0575a398599639b25c5
Binary files /dev/null and b/chatmock/__pycache__/version.cpython-314.pyc differ
diff --git a/chatmock/__pycache__/websocket_routes.cpython-314.pyc b/chatmock/__pycache__/websocket_routes.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0540e73e3dc55ed25b761c3a1154c78ffe3645e4
Binary files /dev/null and b/chatmock/__pycache__/websocket_routes.cpython-314.pyc differ
diff --git a/chatmock/app.py b/chatmock/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecf7e0a0098de3bacdbd149b095317d3fb651182
--- /dev/null
+++ b/chatmock/app.py
@@ -0,0 +1,58 @@
+from __future__ import annotations
+
+from flask import Flask, jsonify
+from flask_sock import Sock
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .http import build_cors_headers
+from .routes_openai import openai_bp
+from .routes_ollama import ollama_bp
+from .routes_custom import custom_bp
+from .websocket_routes import register_websocket_routes
+
+
+def create_app(
+    verbose: bool = False,
+    verbose_obfuscation: bool = False,
+    reasoning_effort: str = "medium",
+    reasoning_summary: str = "auto",
+    reasoning_compat: str = "think-tags",
+    fast_mode: bool = False,
+    debug_model: str | None = None,
+    expose_reasoning_models: bool = False,
+    default_web_search: bool = False,
+) -> Flask:
+    app = Flask(__name__)
+
+    app.config.update(
+        VERBOSE=bool(verbose),
+        VERBOSE_OBFUSCATION=bool(verbose_obfuscation),
+        REASONING_EFFORT=reasoning_effort,
+        REASONING_SUMMARY=reasoning_summary,
+        REASONING_COMPAT=reasoning_compat,
+        FAST_MODE=bool(fast_mode),
+        DEBUG_MODEL=debug_model,
+        BASE_INSTRUCTIONS=BASE_INSTRUCTIONS,
+        GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS,
+        EXPOSE_REASONING_MODELS=bool(expose_reasoning_models),
+        DEFAULT_WEB_SEARCH=bool(default_web_search),
+    )
+
+    @app.get("/")
+    @app.get("/health")
+    def health():
+        return jsonify({"status": "ok"})
+
+    @app.after_request
+    def _cors(resp):
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    app.register_blueprint(openai_bp)
+    app.register_blueprint(ollama_bp)
+    app.register_blueprint(custom_bp)
+    sock = Sock(app)
+    register_websocket_routes(sock)
+
+    return app
diff --git a/chatmock/cli.py b/chatmock/cli.py
new file mode 100644
index 0000000000000000000000000000000000000000..8482cf38b201f61b2c1adf55107c3906139bc20e
--- /dev/null
+++ b/chatmock/cli.py
@@ -0,0 +1,425 @@
+from __future__ import annotations
+
+import errno
+import argparse
+import json
+import os
+import sys
+import webbrowser
+from datetime import datetime
+
+from .app import create_app
+from .config import CLIENT_ID_DEFAULT
+from .limits import RateLimitWindow, compute_reset_at, load_rate_limit_snapshot
+from .oauth import OAuthHTTPServer, OAuthHandler, REQUIRED_PORT, URL_BASE
+from .utils import eprint, get_home_dir, load_chatgpt_tokens, parse_jwt_claims, read_auth_file
+
+
+_STATUS_LIMIT_BAR_SEGMENTS = 30
+_STATUS_LIMIT_BAR_FILLED = "█"
+_STATUS_LIMIT_BAR_EMPTY = "░"
+_STATUS_LIMIT_BAR_PARTIAL = "▓"
+
+
+def _clamp_percent(value: float) -> float:
+    try:
+        percent = float(value)
+    except Exception:
+        return 0.0
+    if percent != percent:
+        return 0.0
+    if percent < 0.0:
+        return 0.0
+    if percent > 100.0:
+        return 100.0
+    return percent
+
+
+def _render_progress_bar(percent_used: float) -> str:
+    ratio = max(0.0, min(1.0, percent_used / 100.0))
+    filled_exact = ratio * _STATUS_LIMIT_BAR_SEGMENTS
+    filled = int(filled_exact)
+    partial = filled_exact - filled
+    
+    has_partial = partial > 0.5
+    if has_partial:
+        filled += 1
+    
+    filled = max(0, min(_STATUS_LIMIT_BAR_SEGMENTS, filled))
+    empty = _STATUS_LIMIT_BAR_SEGMENTS - filled
+    
+    if has_partial and filled > 0:
+        bar = _STATUS_LIMIT_BAR_FILLED * (filled - 1) + _STATUS_LIMIT_BAR_PARTIAL + _STATUS_LIMIT_BAR_EMPTY * empty
+    else:
+        bar = _STATUS_LIMIT_BAR_FILLED * filled + _STATUS_LIMIT_BAR_EMPTY * empty
+    
+    return f"[{bar}]"
+
+
+def _get_usage_color(percent_used: float) -> str:
+    if percent_used >= 90:
+        return "\033[91m" 
+    elif percent_used >= 75:
+        return "\033[93m"  
+    elif percent_used >= 50:
+        return "\033[94m"  
+    else:
+        return "\033[92m" 
+
+
+def _reset_color() -> str:
+    """ANSI reset color code"""
+    return "\033[0m"
+
+
+def _format_window_duration(minutes: int | None) -> str | None:
+    if minutes is None:
+        return None
+    try:
+        total = int(minutes)
+    except Exception:
+        return None
+    if total <= 0:
+        return None
+    minutes = total
+    weeks, remainder = divmod(minutes, 7 * 24 * 60)
+    days, remainder = divmod(remainder, 24 * 60)
+    hours, remainder = divmod(remainder, 60)
+    parts = []
+    if weeks:
+        parts.append(f"{weeks} week" + ("s" if weeks != 1 else ""))
+    if days:
+        parts.append(f"{days} day" + ("s" if days != 1 else ""))
+    if hours:
+        parts.append(f"{hours} hour" + ("s" if hours != 1 else ""))
+    if remainder:
+        parts.append(f"{remainder} minute" + ("s" if remainder != 1 else ""))
+    if not parts:
+        parts.append(f"{minutes} minute" + ("s" if minutes != 1 else ""))
+    return " ".join(parts)
+
+
+def _format_reset_duration(seconds: int | None) -> str | None:
+    if seconds is None:
+        return None
+    try:
+        value = int(seconds)
+    except Exception:
+        return None
+    if value < 0:
+        value = 0
+    days, remainder = divmod(value, 86400)
+    hours, remainder = divmod(remainder, 3600)
+    minutes, remainder = divmod(remainder, 60)
+    parts: list[str] = []
+    if days:
+        parts.append(f"{days}d")
+    if hours:
+        parts.append(f"{hours}h")
+    if minutes:
+        parts.append(f"{minutes}m")
+    if not parts and remainder:
+        parts.append("under 1m")
+    if not parts:
+        parts.append("0m")
+    return " ".join(parts)
+
+
+def _format_local_datetime(dt: datetime) -> str:
+    local = dt.astimezone()
+    tz_name = local.tzname() or "local"
+    return f"{local.strftime('%b %d, %Y %H:%M')} {tz_name}"
+
+
+def _print_usage_limits_block() -> None:
+    stored = load_rate_limit_snapshot()
+    
+    print("📊 Usage Limits")
+    
+    if stored is None:
+        print("  No usage data available yet. Send a request through ChatMock first.")
+        print()
+        return
+
+    update_time = _format_local_datetime(stored.captured_at)
+    print(f"Last updated: {update_time}")
+    print()
+
+    windows: list[tuple[str, str, RateLimitWindow]] = []
+    if stored.snapshot.primary is not None:
+        windows.append(("⚡", "5 hour limit", stored.snapshot.primary))
+    if stored.snapshot.secondary is not None:
+        windows.append(("📅", "Weekly limit", stored.snapshot.secondary))
+
+    if not windows:
+        print("  Usage data was captured but no limit windows were provided.")
+        print()
+        return
+
+    for i, (icon_label, desc, window) in enumerate(windows):
+        if i > 0:
+            print()
+        
+        percent_used = _clamp_percent(window.used_percent)
+        remaining = max(0.0, 100.0 - percent_used)
+        color = _get_usage_color(percent_used)
+        reset = _reset_color()
+        
+        progress = _render_progress_bar(percent_used)
+        usage_text = f"{percent_used:5.1f}% used"
+        remaining_text = f"{remaining:5.1f}% left"
+        
+        print(f"{icon_label} {desc}")
+        print(f"{color}{progress}{reset} {color}{usage_text}{reset} | {remaining_text}")
+        
+        reset_in = _format_reset_duration(window.resets_in_seconds)
+        reset_at = compute_reset_at(stored.captured_at, window)
+        
+        if reset_in and reset_at:
+            reset_at_str = _format_local_datetime(reset_at)
+            print(f"    ⏳ Resets in: {reset_in} at {reset_at_str}")
+        elif reset_in:
+            print(f"    ⏳ Resets in: {reset_in}")
+        elif reset_at:
+            reset_at_str = _format_local_datetime(reset_at)
+            print(f"    ⏳ Resets at: {reset_at_str}")
+
+    print()
+
+def cmd_login(no_browser: bool, verbose: bool) -> int:
+    home_dir = get_home_dir()
+    client_id = CLIENT_ID_DEFAULT
+    if not client_id:
+        eprint("ERROR: No OAuth client id configured. Set CHATGPT_LOCAL_CLIENT_ID.")
+        return 1
+
+    try:
+        bind_host = os.getenv("CHATGPT_LOCAL_LOGIN_BIND", "127.0.0.1")
+        httpd = OAuthHTTPServer((bind_host, REQUIRED_PORT), OAuthHandler, home_dir=home_dir, client_id=client_id, verbose=verbose)
+    except OSError as e:
+        eprint(f"ERROR: {e}")
+        if e.errno == errno.EADDRINUSE:
+            return 13
+        return 1
+
+    auth_url = httpd.auth_url()
+    with httpd:
+        eprint(f"Starting local login server on {URL_BASE}")
+        if not no_browser:
+            try:
+                webbrowser.open(auth_url, new=1, autoraise=True)
+            except Exception as e:
+                eprint(f"Failed to open browser: {e}")
+        eprint(f"If your browser did not open, navigate to:\n{auth_url}")
+
+        def _stdin_paste_worker() -> None:
+            try:
+                eprint(
+                    "If the browser can't reach this machine, paste the full redirect URL here and press Enter (or leave blank to keep waiting):"
+                )
+                line = sys.stdin.readline().strip()
+                if not line:
+                    return
+                try:
+                    from urllib.parse import urlparse, parse_qs
+
+                    parsed = urlparse(line)
+                    params = parse_qs(parsed.query)
+                    code = (params.get("code") or [None])[0]
+                    state = (params.get("state") or [None])[0]
+                    if not code:
+                        eprint("Input did not contain an auth code. Ignoring.")
+                        return
+                    if state and state != httpd.state:
+                        eprint("State mismatch. Ignoring pasted URL for safety.")
+                        return
+                    eprint("Received redirect URL. Completing login without callback…")
+                    bundle, _ = httpd.exchange_code(code)
+                    if httpd.persist_auth(bundle):
+                        httpd.exit_code = 0
+                        eprint("Login successful. Tokens saved.")
+                    else:
+                        eprint("ERROR: Unable to persist auth file.")
+                    httpd.shutdown()
+                except Exception as exc:
+                    eprint(f"Failed to process pasted redirect URL: {exc}")
+            except Exception:
+                pass
+
+        try:
+            import threading
+
+            threading.Thread(target=_stdin_paste_worker, daemon=True).start()
+        except Exception:
+            pass
+        try:
+            httpd.serve_forever()
+        except KeyboardInterrupt:
+            eprint("\nKeyboard interrupt received, exiting.")
+        return httpd.exit_code
+
+
+def cmd_serve(
+    host: str,
+    port: int,
+    verbose: bool,
+    verbose_obfuscation: bool,
+    reasoning_effort: str,
+    reasoning_summary: str,
+    reasoning_compat: str,
+    fast_mode: bool,
+    debug_model: str | None,
+    expose_reasoning_models: bool,
+    default_web_search: bool,
+) -> int:
+    app = create_app(
+        verbose=verbose,
+        verbose_obfuscation=verbose_obfuscation,
+        reasoning_effort=reasoning_effort,
+        reasoning_summary=reasoning_summary,
+        reasoning_compat=reasoning_compat,
+        fast_mode=fast_mode,
+        debug_model=debug_model,
+        expose_reasoning_models=expose_reasoning_models,
+        default_web_search=default_web_search,
+    )
+
+    app.run(host=host, use_reloader=False, port=port, threaded=True)
+    return 0
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="ChatMock: login & OpenAI-compatible proxy")
+    sub = parser.add_subparsers(dest="command", required=True)
+
+    p_login = sub.add_parser("login", help="Authorize with ChatGPT and store tokens")
+    p_login.add_argument("--no-browser", action="store_true", help="Do not open the browser automatically")
+    p_login.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+
+    p_serve = sub.add_parser("serve", help="Run local OpenAI-compatible server")
+    p_serve.add_argument("--host", default="127.0.0.1")
+    p_serve.add_argument("--port", type=int, default=8000)
+    p_serve.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+    p_serve.add_argument(
+        "--verbose-obfuscation",
+        action="store_true",
+        help="Also dump raw SSE/obfuscation events (in addition to --verbose request/response logs).",
+    )
+    p_serve.add_argument(
+        "--debug-model",
+        dest="debug_model",
+        default=os.getenv("CHATGPT_LOCAL_DEBUG_MODEL"),
+        help="Forcibly override requested 'model' with this value",
+    )
+    p_serve.add_argument(
+        "--fast-mode",
+        action=argparse.BooleanOptionalAction,
+        default=(os.getenv("CHATGPT_LOCAL_FAST_MODE") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help="Enable GPT fast mode by default for supported models; request-level overrides still take precedence.",
+    )
+    p_serve.add_argument(
+        "--reasoning-effort",
+        choices=["none", "minimal", "low", "medium", "high", "xhigh"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_EFFORT", "medium").lower(),
+        help="Reasoning effort level for Responses API (default: medium)",
+    )
+    p_serve.add_argument(
+        "--reasoning-summary",
+        choices=["auto", "concise", "detailed", "none"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_SUMMARY", "auto").lower(),
+        help="Reasoning summary verbosity (default: auto)",
+    )
+    p_serve.add_argument(
+        "--reasoning-compat",
+        choices=["legacy", "o3", "think-tags", "current"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_COMPAT", "think-tags").lower(),
+        help=(
+            "Compatibility mode for exposing reasoning to clients (legacy|o3|think-tags). "
+            "'current' is accepted as an alias for 'legacy'"
+        ),
+    )
+    p_serve.add_argument(
+        "--expose-reasoning-models",
+        action="store_true",
+        default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help=(
+            "Expose GPT-5 family reasoning effort variants (none|minimal|low|medium|high|xhigh where supported) "
+            "as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs."
+        ),
+    )
+    p_serve.add_argument(
+        "--enable-web-search",
+        action=argparse.BooleanOptionalAction,
+        default=(os.getenv("CHATGPT_LOCAL_ENABLE_WEB_SEARCH") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help=(
+            "Enable default web_search tool when a request omits responses_tools (off by default). "
+            "Also configurable via CHATGPT_LOCAL_ENABLE_WEB_SEARCH."
+        ),
+    )
+
+    p_info = sub.add_parser("info", help="Print current stored tokens and derived account id")
+    p_info.add_argument("--json", action="store_true", help="Output raw auth.json contents")
+
+    args = parser.parse_args()
+
+    if args.command == "login":
+        sys.exit(cmd_login(no_browser=args.no_browser, verbose=args.verbose))
+    elif args.command == "serve":
+        sys.exit(
+            cmd_serve(
+                host=args.host,
+                port=args.port,
+                verbose=args.verbose,
+                verbose_obfuscation=args.verbose_obfuscation,
+                reasoning_effort=args.reasoning_effort,
+                reasoning_summary=args.reasoning_summary,
+                reasoning_compat=args.reasoning_compat,
+                fast_mode=args.fast_mode,
+                debug_model=args.debug_model,
+                expose_reasoning_models=args.expose_reasoning_models,
+                default_web_search=args.enable_web_search,
+            )
+        )
+    elif args.command == "info":
+        auth = read_auth_file()
+        if getattr(args, "json", False):
+            print(json.dumps(auth or {}, indent=2))
+            sys.exit(0)
+        access_token, account_id, id_token = load_chatgpt_tokens()
+        if not access_token or not id_token:
+            print("👤 Account")
+            print("  • Not signed in")
+            print("  • Run: python3 chatmock.py login")
+            print("")
+            _print_usage_limits_block()
+            sys.exit(0)
+
+        id_claims = parse_jwt_claims(id_token) or {}
+        access_claims = parse_jwt_claims(access_token) or {}
+
+        email = id_claims.get("email") or id_claims.get("preferred_username") or "<unknown>"
+        plan_raw = (access_claims.get("https://api.openai.com/auth") or {}).get("chatgpt_plan_type") or "unknown"
+        plan_map = {
+            "plus": "Plus",
+            "pro": "Pro",
+            "free": "Free",
+            "team": "Team",
+            "enterprise": "Enterprise",
+        }
+        plan = plan_map.get(str(plan_raw).lower(), str(plan_raw).title() if isinstance(plan_raw, str) else "Unknown")
+
+        print("👤 Account")
+        print("  • Signed in with ChatGPT")
+        print(f"  • Login: {email}")
+        print(f"  • Plan: {plan}")
+        if account_id:
+            print(f"  • Account ID: {account_id}")
+        print("")
+        _print_usage_limits_block()
+        sys.exit(0)
+    else:
+        parser.error("Unknown command")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/chatmock/config.py b/chatmock/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc5ca817b47d23d86c2d32e2e5492fbe11ce9463
--- /dev/null
+++ b/chatmock/config.py
@@ -0,0 +1,48 @@
+from __future__ import annotations
+
+import os
+import sys
+from pathlib import Path
+
+
+CLIENT_ID_DEFAULT = os.getenv("CHATGPT_LOCAL_CLIENT_ID") or "app_EMoamEEZ73f0CkXaXp7hrann"
+OAUTH_ISSUER_DEFAULT = os.getenv("CHATGPT_LOCAL_ISSUER") or "https://auth.openai.com"
+OAUTH_TOKEN_URL = f"{OAUTH_ISSUER_DEFAULT}/oauth/token"
+
+CHATGPT_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses"
+
+
+def _read_prompt_text(filename: str) -> str | None:
+    candidates = [
+        Path(__file__).parent.parent / filename,
+        Path(__file__).parent / filename,
+        Path(getattr(sys, "_MEIPASS", "")) / filename if getattr(sys, "_MEIPASS", None) else None,
+        Path.cwd() / filename,
+    ]
+    for candidate in candidates:
+        if not candidate:
+            continue
+        try:
+            if candidate.exists():
+                content = candidate.read_text(encoding="utf-8")
+                if isinstance(content, str) and content.strip():
+                    return content
+        except Exception:
+            continue
+    return None
+
+
+def read_base_instructions() -> str:
+    content = _read_prompt_text("prompt.md")
+    if content is None:
+        raise FileNotFoundError("Failed to read prompt.md; expected adjacent to package or CWD.")
+    return content
+
+
+def read_gpt5_codex_instructions(fallback: str) -> str:
+    content = _read_prompt_text("prompt_gpt5_codex.md")
+    return content if isinstance(content, str) and content.strip() else fallback
+
+
+BASE_INSTRUCTIONS = read_base_instructions()
+GPT5_CODEX_INSTRUCTIONS = read_gpt5_codex_instructions(BASE_INSTRUCTIONS)
diff --git a/chatmock/fast_mode.py b/chatmock/fast_mode.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dbb557c41fcc23b83e6cc4c42284122b6b90d87
--- /dev/null
+++ b/chatmock/fast_mode.py
@@ -0,0 +1,92 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Any
+
+from .model_registry import normalize_model_name
+
+
+PRIORITY_SUPPORTED_MODELS = frozenset(
+    (
+        "gpt-5.4",
+        "gpt-5.2",
+        "gpt-5.1",
+        "gpt-5",
+        "gpt-5.1-codex",
+        "gpt-5-codex",
+    )
+)
+
+_TRUE_STRINGS = {"1", "true", "yes", "on"}
+_FALSE_STRINGS = {"0", "false", "no", "off"}
+
+
+def parse_optional_bool(value: Any) -> bool | None:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in _TRUE_STRINGS:
+            return True
+        if normalized in _FALSE_STRINGS:
+            return False
+    return None
+
+
+def supports_priority_service_tier(model: str | None) -> bool:
+    return normalize_model_name(model) in PRIORITY_SUPPORTED_MODELS
+
+
+@dataclass(frozen=True)
+class ServiceTierResolution:
+    service_tier: str | None
+    error_message: str | None = None
+    warning_message: str | None = None
+    used_server_default: bool = False
+
+
+def resolve_service_tier(
+    model: str | None,
+    *,
+    request_fast_mode: Any = None,
+    request_service_tier: Any = None,
+    server_fast_mode: bool = False,
+) -> ServiceTierResolution:
+    explicit_fast_mode = parse_optional_bool(request_fast_mode)
+
+    tier: str | None = None
+    explicit_request = False
+    used_server_default = False
+
+    if explicit_fast_mode is not None:
+        tier = "priority" if explicit_fast_mode else None
+        explicit_request = True
+    elif isinstance(request_service_tier, str) and request_service_tier.strip():
+        tier = request_service_tier.strip().lower()
+        explicit_request = True
+    elif server_fast_mode:
+        tier = "priority"
+        used_server_default = True
+
+    if tier == "priority" and not supports_priority_service_tier(model):
+        normalized = normalize_model_name(model)
+        message = (
+            f"Fast mode is not supported for model '{normalized}'. "
+            "Use a supported GPT-5 priority-processing model or disable fast mode for this request."
+        )
+        if explicit_request:
+            return ServiceTierResolution(
+                service_tier=None,
+                error_message=message,
+                used_server_default=used_server_default,
+            )
+        return ServiceTierResolution(
+            service_tier=None,
+            warning_message=message,
+            used_server_default=used_server_default,
+        )
+
+    return ServiceTierResolution(
+        service_tier=tier,
+        used_server_default=used_server_default,
+    )
diff --git a/chatmock/http.py b/chatmock/http.py
new file mode 100644
index 0000000000000000000000000000000000000000..567093a4a409bfc27fb85311e533879a0e5e783a
--- /dev/null
+++ b/chatmock/http.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from flask import Response, jsonify, request
+
+
+def build_cors_headers() -> dict:
+    origin = request.headers.get("Origin", "*")
+    req_headers = request.headers.get("Access-Control-Request-Headers")
+    allow_headers = req_headers if req_headers else "Authorization, Content-Type, Accept"
+    return {
+        "Access-Control-Allow-Origin": origin,
+        "Access-Control-Allow-Methods": "POST, GET, OPTIONS",
+        "Access-Control-Allow-Headers": allow_headers,
+        "Access-Control-Max-Age": "86400",
+    }
+
+
+def json_error(message: str, status: int = 400) -> Response:
+    resp = jsonify({"error": {"message": message}})
+    response: Response = Response(response=resp.response, status=status, mimetype="application/json")
+    for k, v in build_cors_headers().items():
+        response.headers.setdefault(k, v)
+    return response
+
diff --git a/chatmock/limits.py b/chatmock/limits.py
new file mode 100644
index 0000000000000000000000000000000000000000..862076c5ec4fafa65e1f088db44ce7bf03d63fde
--- /dev/null
+++ b/chatmock/limits.py
@@ -0,0 +1,200 @@
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from typing import Any, Mapping, Optional
+
+from .utils import get_home_dir
+
+_PRIMARY_USED = "x-codex-primary-used-percent"
+_PRIMARY_WINDOW = "x-codex-primary-window-minutes"
+_PRIMARY_RESET = "x-codex-primary-reset-after-seconds"
+_SECONDARY_USED = "x-codex-secondary-used-percent"
+_SECONDARY_WINDOW = "x-codex-secondary-window-minutes"
+_SECONDARY_RESET = "x-codex-secondary-reset-after-seconds"
+
+_LIMITS_FILENAME = "usage_limits.json"
+
+
+@dataclass
+class RateLimitWindow:
+    used_percent: float
+    window_minutes: Optional[int]
+    resets_in_seconds: Optional[int]
+
+
+@dataclass
+class RateLimitSnapshot:
+    primary: Optional[RateLimitWindow]
+    secondary: Optional[RateLimitWindow]
+
+
+@dataclass
+class StoredRateLimitSnapshot:
+    captured_at: datetime
+    snapshot: RateLimitSnapshot
+
+
+def _parse_float(value: Any) -> Optional[float]:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, (int, float)):
+            return float(value)
+        value_str = str(value).strip()
+        if not value_str:
+            return None
+        parsed = float(value_str)
+        if not (parsed == parsed and parsed not in (float("inf"), float("-inf"))):
+            return None
+        return parsed
+    except Exception:
+        return None
+
+
+def _parse_int(value: Any) -> Optional[int]:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, bool):
+            return None
+        if isinstance(value, int):
+            return value
+        value_str = str(value).strip()
+        if not value_str:
+            return None
+        return int(value_str)
+    except Exception:
+        return None
+
+
+def _parse_window(headers: Mapping[str, Any], used_key: str, window_key: str, reset_key: str) -> Optional[RateLimitWindow]:
+    used_percent = _parse_float(headers.get(used_key))
+    if used_percent is None:
+        return None
+    window_minutes = _parse_int(headers.get(window_key))
+    resets_in_seconds = _parse_int(headers.get(reset_key))
+    return RateLimitWindow(used_percent=used_percent, window_minutes=window_minutes, resets_in_seconds=resets_in_seconds)
+
+
+def parse_rate_limit_headers(headers: Mapping[str, Any]) -> Optional[RateLimitSnapshot]:
+    try:
+        primary = _parse_window(headers, _PRIMARY_USED, _PRIMARY_WINDOW, _PRIMARY_RESET)
+        secondary = _parse_window(headers, _SECONDARY_USED, _SECONDARY_WINDOW, _SECONDARY_RESET)
+        if primary is None and secondary is None:
+            return None
+        return RateLimitSnapshot(primary=primary, secondary=secondary)
+    except Exception:
+        return None
+
+
+def _limits_path() -> str:
+    home = get_home_dir()
+    return os.path.join(home, _LIMITS_FILENAME)
+
+
+def store_rate_limit_snapshot(snapshot: RateLimitSnapshot, captured_at: Optional[datetime] = None) -> None:
+    captured = captured_at or datetime.now(timezone.utc)
+    try:
+        home = get_home_dir()
+        os.makedirs(home, exist_ok=True)
+        payload: dict[str, Any] = {
+            "captured_at": captured.isoformat(),
+        }
+        if snapshot.primary:
+            payload["primary"] = {
+                "used_percent": snapshot.primary.used_percent,
+                "window_minutes": snapshot.primary.window_minutes,
+                "resets_in_seconds": snapshot.primary.resets_in_seconds,
+            }
+        if snapshot.secondary:
+            payload["secondary"] = {
+                "used_percent": snapshot.secondary.used_percent,
+                "window_minutes": snapshot.secondary.window_minutes,
+                "resets_in_seconds": snapshot.secondary.resets_in_seconds,
+            }
+        with open(_limits_path(), "w", encoding="utf-8") as fp:
+            if hasattr(os, "fchmod"):
+                try:
+                    os.fchmod(fp.fileno(), 0o600)
+                except OSError:
+                    pass
+            json.dump(payload, fp, indent=2)
+    except Exception:
+        # Silently ignore persistence errors.
+        pass
+
+
+def load_rate_limit_snapshot() -> Optional[StoredRateLimitSnapshot]:
+    try:
+        with open(_limits_path(), "r", encoding="utf-8") as fp:
+            raw = json.load(fp)
+    except FileNotFoundError:
+        return None
+    except Exception:
+        return None
+
+    captured_raw = raw.get("captured_at")
+    captured_at = _parse_datetime(captured_raw)
+    if captured_at is None:
+        return None
+
+    snapshot = RateLimitSnapshot(
+        primary=_dict_to_window(raw.get("primary")),
+        secondary=_dict_to_window(raw.get("secondary")),
+    )
+    if snapshot.primary is None and snapshot.secondary is None:
+        return None
+    return StoredRateLimitSnapshot(captured_at=captured_at, snapshot=snapshot)
+
+
+def _parse_datetime(value: Any) -> Optional[datetime]:
+    if not isinstance(value, str):
+        return None
+    text = value.strip()
+    if not text:
+        return None
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        dt = datetime.fromisoformat(text)
+        if dt.tzinfo is None:
+            return dt.replace(tzinfo=timezone.utc)
+        return dt
+    except ValueError:
+        return None
+
+
+def _dict_to_window(value: Any) -> Optional[RateLimitWindow]:
+    if not isinstance(value, dict):
+        return None
+    used = _parse_float(value.get("used_percent"))
+    if used is None:
+        return None
+    window = _parse_int(value.get("window_minutes"))
+    resets = _parse_int(value.get("resets_in_seconds"))
+    return RateLimitWindow(used_percent=used, window_minutes=window, resets_in_seconds=resets)
+
+
+def record_rate_limits_from_response(response: Any) -> None:
+    if response is None:
+        return
+    headers = getattr(response, "headers", None)
+    if headers is None:
+        return
+    snapshot = parse_rate_limit_headers(headers)
+    if snapshot is None:
+        return
+    store_rate_limit_snapshot(snapshot)
+
+
+def compute_reset_at(captured_at: datetime, window: RateLimitWindow) -> Optional[datetime]:
+    if window.resets_in_seconds is None:
+        return None
+    try:
+        return captured_at + timedelta(seconds=int(window.resets_in_seconds))
+    except Exception:
+        return None
+
diff --git a/chatmock/model_registry.py b/chatmock/model_registry.py
new file mode 100644
index 0000000000000000000000000000000000000000..04e1314a92bca4145f461660a2e708f1695802e8
--- /dev/null
+++ b/chatmock/model_registry.py
@@ -0,0 +1,205 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Iterable
+
+
+ALL_REASONING_EFFORTS = ("none", "minimal", "low", "medium", "high", "xhigh")
+DEFAULT_REASONING_EFFORTS = frozenset(ALL_REASONING_EFFORTS)
+
+
+@dataclass(frozen=True)
+class ModelSpec:
+    public_id: str
+    upstream_id: str
+    aliases: tuple[str, ...]
+    allowed_efforts: frozenset[str]
+    variant_efforts: tuple[str, ...]
+    uses_codex_instructions: bool = False
+
+
+_MODEL_SPECS = (
+    ModelSpec(
+        public_id="gpt-5",
+        upstream_id="gpt-5",
+        aliases=("gpt5", "gpt-5-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=("high", "medium", "low", "minimal"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.1",
+        upstream_id="gpt-5.1",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=("high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.2",
+        upstream_id="gpt-5.2",
+        aliases=("gpt5.2", "gpt-5.2-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.4",
+        upstream_id="gpt-5.4",
+        aliases=("gpt5.4", "gpt-5.4-latest"),
+        allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low", "none"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.5",
+        upstream_id="gpt-5.5",
+        aliases=("gpt5.5", "gpt-5.5-latest"),
+        allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low", "none"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.4-mini",
+        upstream_id="gpt-5.4-mini",
+        aliases=("gpt5.4-mini", "gpt-5.4-mini-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.3-codex",
+        upstream_id="gpt-5.3-codex",
+        aliases=("gpt5.3-codex", "gpt-5.3-codex-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.3-codex-spark",
+        upstream_id="gpt-5.3-codex-spark",
+        aliases=("gpt5.3-codex-spark", "gpt-5.3-codex-spark-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5-codex",
+        upstream_id="gpt-5-codex",
+        aliases=("gpt5-codex", "gpt-5-codex-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=("high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.2-codex",
+        upstream_id="gpt-5.2-codex",
+        aliases=("gpt5.2-codex", "gpt-5.2-codex-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex",
+        upstream_id="gpt-5.1-codex",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=("high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex-max",
+        upstream_id="gpt-5.1-codex-max",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex-mini",
+        upstream_id="gpt-5.1-codex-mini",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=(),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="codex-mini",
+        upstream_id="codex-mini-latest",
+        aliases=("codex", "codex-mini-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=(),
+        uses_codex_instructions=True,
+    ),
+)
+
+_SPECS_BY_UPSTREAM = {spec.upstream_id: spec for spec in _MODEL_SPECS}
+_ALIASES = {}
+for _spec in _MODEL_SPECS:
+    _ALIASES[_spec.public_id] = _spec.upstream_id
+    for _alias in _spec.aliases:
+        _ALIASES[_alias] = _spec.upstream_id
+
+
+def _strip_model_name(model: str | None) -> tuple[str, str | None]:
+    if not isinstance(model, str):
+        return "", None
+    value = model.strip().lower()
+    if not value:
+        return "", None
+    if ":" in value:
+        base, maybe_effort = value.rsplit(":", 1)
+        if maybe_effort in DEFAULT_REASONING_EFFORTS:
+            return base, maybe_effort
+    for separator in ("-", "_"):
+        for effort in ALL_REASONING_EFFORTS:
+            suffix = f"{separator}{effort}"
+            if value.endswith(suffix):
+                return value[: -len(suffix)], effort
+    return value, None
+
+
+def model_spec_for_name(model: str | None) -> ModelSpec | None:
+    base, _ = _strip_model_name(model)
+    upstream_id = _ALIASES.get(base)
+    if not upstream_id:
+        return None
+    return _SPECS_BY_UPSTREAM.get(upstream_id)
+
+
+def normalize_model_name(model: str | None, debug_model: str | None = None) -> str:
+    if isinstance(debug_model, str) and debug_model.strip():
+        return debug_model.strip()
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.upstream_id
+    base, _ = _strip_model_name(model)
+    return base or "gpt-5.4"
+
+
+def uses_codex_instructions(model: str | None) -> bool:
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.uses_codex_instructions
+    return "codex" in ((model or "").strip().lower())
+
+
+def allowed_efforts_for_model(model: str | None) -> frozenset[str]:
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.allowed_efforts
+    return DEFAULT_REASONING_EFFORTS
+
+
+def extract_reasoning_from_model_name(model: str | None) -> dict[str, str] | None:
+    _, effort = _strip_model_name(model)
+    if not effort:
+        return None
+    return {"effort": effort}
+
+
+def list_public_models(expose_reasoning_models: bool = False) -> list[str]:
+    model_ids: list[str] = []
+    for spec in _MODEL_SPECS:
+        model_ids.append(spec.public_id)
+        if expose_reasoning_models:
+            model_ids.extend(f"{spec.public_id}-{effort}" for effort in spec.variant_efforts)
+    return model_ids
+
+
+def iter_public_models() -> Iterable[ModelSpec]:
+    return _MODEL_SPECS
diff --git a/chatmock/models.py b/chatmock/models.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb19ac49773ed42a158d7253646c58ae1b7739d9
--- /dev/null
+++ b/chatmock/models.py
@@ -0,0 +1,26 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass
+class TokenData:
+    id_token: str
+    access_token: str
+    refresh_token: str
+    account_id: str
+
+
+@dataclass
+class AuthBundle:
+    api_key: Optional[str]
+    token_data: TokenData
+    last_refresh: str
+
+
+@dataclass
+class PkceCodes:
+    code_verifier: str
+    code_challenge: str
+
diff --git a/chatmock/oauth.py b/chatmock/oauth.py
new file mode 100644
index 0000000000000000000000000000000000000000..2659498abf4dc655bbca656b62841319791b1ceb
--- /dev/null
+++ b/chatmock/oauth.py
@@ -0,0 +1,340 @@
+from __future__ import annotations
+
+import datetime
+import ssl
+import http.server
+import json
+import secrets
+import threading
+import time
+import urllib.parse
+import urllib.request
+from typing import Any, Dict, Tuple
+
+import certifi
+
+from .config import OAUTH_ISSUER_DEFAULT
+from .models import AuthBundle, PkceCodes, TokenData
+from .utils import eprint, generate_pkce, parse_jwt_claims, write_auth_file
+
+
+REQUIRED_PORT = 1455
+URL_BASE = f"http://localhost:{REQUIRED_PORT}"
+DEFAULT_ISSUER = OAUTH_ISSUER_DEFAULT
+
+
+LOGIN_SUCCESS_HTML = """<!DOCTYPE html>
+<html lang=\"en\">
+  <head>
+    <meta charset=\"utf-8\" />
+    <title>Login successful</title>
+  </head>
+  <body>
+    <div style=\"max-width: 640px; margin: 80px auto; font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;\"> 
+      <h1>Login successful</h1>
+      <p>You can now close this window and return to the terminal and run <code>python3 chatmock.py serve</code> to start the server.</p>
+    </div>
+  </body>
+  </html>
+"""
+
+_SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
+
+class OAuthHTTPServer(http.server.HTTPServer):
+    def __init__(
+        self,
+        server_address: tuple[str, int],
+        request_handler_class: type[http.server.BaseHTTPRequestHandler],
+        *,
+        home_dir: str,
+        client_id: str,
+        verbose: bool = False,
+    ) -> None:
+        super().__init__(server_address, request_handler_class, bind_and_activate=True)
+        self.exit_code = 1
+        self.home_dir = home_dir
+        self.verbose = verbose
+        self.issuer = DEFAULT_ISSUER
+        self.token_endpoint = f"{self.issuer}/oauth/token"
+        self.client_id = client_id
+        port = server_address[1]
+        self.redirect_uri = f"http://localhost:{port}/auth/callback"
+        self.pkce = generate_pkce()
+        self.state = secrets.token_hex(32)
+
+    def auth_url(self) -> str:
+        params = {
+            "response_type": "code",
+            "client_id": self.client_id,
+            "redirect_uri": self.redirect_uri,
+            "scope": "openid profile email offline_access",
+            "code_challenge": self.pkce.code_challenge,
+            "code_challenge_method": "S256",
+            "id_token_add_organizations": "true",
+            "codex_cli_simplified_flow": "true",
+            "state": self.state,
+        }
+        return f"{self.issuer}/oauth/authorize?" + urllib.parse.urlencode(params)
+
+    def exchange_code(self, code: str) -> tuple[AuthBundle, str]:
+        data = urllib.parse.urlencode(
+            {
+                "grant_type": "authorization_code",
+                "code": code,
+                "redirect_uri": self.redirect_uri,
+                "client_id": self.client_id,
+                "code_verifier": self.pkce.code_verifier,
+            }
+        ).encode()
+
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.token_endpoint,
+                data=data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            payload = json.loads(resp.read().decode())
+
+        id_token = payload.get("id_token", "")
+        access_token = payload.get("access_token", "")
+        refresh_token = payload.get("refresh_token", "")
+
+        id_token_claims = parse_jwt_claims(id_token)
+        access_token_claims = parse_jwt_claims(access_token)
+
+        auth_claims = (id_token_claims or {}).get("https://api.openai.com/auth", {})
+        chatgpt_account_id = auth_claims.get("chatgpt_account_id", "")
+
+        token_data = TokenData(
+            id_token=id_token,
+            access_token=access_token,
+            refresh_token=refresh_token,
+            account_id=chatgpt_account_id,
+        )
+
+        api_key, success_url = self.maybe_obtain_api_key(
+            id_token_claims or {}, access_token_claims or {}, token_data
+        )
+
+        last_refresh_str = (
+            datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")
+        )
+        bundle = AuthBundle(api_key=api_key, token_data=token_data, last_refresh=last_refresh_str)
+        return bundle, success_url or f"{URL_BASE}/success"
+
+    def maybe_obtain_api_key(
+        self,
+        token_claims: Dict[str, Any],
+        access_claims: Dict[str, Any],
+        token_data: TokenData,
+    ) -> tuple[str | None, str | None]:
+        org_id = token_claims.get("organization_id")
+        project_id = token_claims.get("project_id")
+        if not org_id or not project_id:
+            query = {
+                "id_token": token_data.id_token,
+                "needs_setup": "false",
+                "org_id": org_id or "",
+                "project_id": project_id or "",
+                "plan_type": access_claims.get("chatgpt_plan_type"),
+                "platform_url": "https://platform.openai.com",
+            }
+            return None, f"{URL_BASE}/success?{urllib.parse.urlencode(query)}"
+
+        today = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
+        exchange_data = urllib.parse.urlencode(
+            {
+                "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
+                "client_id": self.client_id,
+                "requested_token": "openai-api-key",
+                "subject_token": token_data.id_token,
+                "subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
+                "name": f"ChatMock [auto-generated] ({today})",
+            }
+        ).encode()
+
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.token_endpoint,
+                data=exchange_data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            exchange_payload = json.loads(resp.read().decode())
+            exchanged_access_token = exchange_payload.get("access_token")
+
+        chatgpt_plan_type = access_claims.get("chatgpt_plan_type")
+        success_url_query = {
+            "id_token": token_data.id_token,
+            "access_token": token_data.access_token,
+            "refresh_token": token_data.refresh_token,
+            "exchanged_access_token": exchanged_access_token,
+            "org_id": org_id,
+            "project_id": project_id,
+            "plan_type": chatgpt_plan_type,
+            "platform_url": "https://platform.openai.com",
+        }
+        success_url = f"{URL_BASE}/success?{urllib.parse.urlencode(success_url_query)}"
+        return exchanged_access_token, success_url
+
+    def persist_auth(self, bundle: AuthBundle) -> bool:
+        auth_json_contents = {
+            "OPENAI_API_KEY": bundle.api_key,
+            "tokens": {
+                "id_token": bundle.token_data.id_token,
+                "access_token": bundle.token_data.access_token,
+                "refresh_token": bundle.token_data.refresh_token,
+                "account_id": bundle.token_data.account_id,
+            },
+            "last_refresh": bundle.last_refresh,
+        }
+        return write_auth_file(auth_json_contents)
+
+
+class OAuthHandler(http.server.BaseHTTPRequestHandler):
+    server: "OAuthHTTPServer"
+
+    def do_GET(self) -> None:
+        path = urllib.parse.urlparse(self.path).path
+        if path == "/success":
+            self._send_html(LOGIN_SUCCESS_HTML)
+            try:
+                self.wfile.flush()
+            except Exception as e:
+                eprint(f"Failed to flush response: {e}")
+            self._shutdown_after_delay(2.0)
+            return
+
+        if path != "/auth/callback":
+            self.send_error(404, "Not Found")
+            self._shutdown()
+            return
+
+        query = urllib.parse.urlparse(self.path).query
+        params = urllib.parse.parse_qs(query)
+
+        code = params.get("code", [None])[0]
+        if not code:
+            self.send_error(400, "Missing auth code")
+            self._shutdown()
+            return
+
+        try:
+            auth_bundle, success_url = self._exchange_code(code)
+        except Exception as exc:
+            self.send_error(500, f"Token exchange failed: {exc}")
+            self._shutdown()
+            return
+
+        auth_json_contents = {
+            "OPENAI_API_KEY": auth_bundle.api_key,
+            "tokens": {
+                "id_token": auth_bundle.token_data.id_token,
+                "access_token": auth_bundle.token_data.access_token,
+                "refresh_token": auth_bundle.token_data.refresh_token,
+                "account_id": auth_bundle.token_data.account_id,
+            },
+            "last_refresh": auth_bundle.last_refresh,
+        }
+        if write_auth_file(auth_json_contents):
+            self.server.exit_code = 0
+            self._send_html(LOGIN_SUCCESS_HTML)
+        else:
+            self.send_error(500, "Unable to persist auth file")
+        self._shutdown_after_delay(2.0)
+
+    def do_POST(self) -> None:
+        self.send_error(404, "Not Found")
+        self._shutdown()
+
+    def log_message(self, fmt: str, *args):
+        if getattr(self.server, "verbose", False):
+            super().log_message(fmt, *args)
+
+    def _send_redirect(self, url: str) -> None:
+        self.send_response(302)
+        self.send_header("Location", url)
+        self.end_headers()
+
+    def _send_html(self, body: str) -> None:
+        encoded = body.encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.send_header("Content-Length", str(len(encoded)))
+        self.end_headers()
+        self.wfile.write(encoded)
+
+    def _shutdown(self) -> None:
+        threading.Thread(target=self.server.shutdown, daemon=True).start()
+
+    def _shutdown_after_delay(self, seconds: float = 2.0) -> None:
+        def _later():
+            try:
+                time.sleep(seconds)
+            finally:
+                self._shutdown()
+
+        threading.Thread(target=_later, daemon=True).start()
+
+    def _exchange_code(self, code: str) -> Tuple[AuthBundle, str]:
+        return self.server.exchange_code(code)
+
+    def _maybe_obtain_api_key(
+        self,
+        token_claims: Dict[str, Any],
+        access_claims: Dict[str, Any],
+        token_data: TokenData,
+    ) -> Tuple[str | None, str | None]:
+        org_id = token_claims.get("organization_id")
+        project_id = token_claims.get("project_id")
+        if not org_id or not project_id:
+            query = {
+                "id_token": token_data.id_token,
+                "needs_setup": "false",
+                "org_id": org_id or "",
+                "project_id": project_id or "",
+                "plan_type": access_claims.get("chatgpt_plan_type"),
+                "platform_url": "https://platform.openai.com",
+            }
+            return None, f"{URL_BASE}/success?{urllib.parse.urlencode(query)}"
+
+        today = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
+        exchange_data = urllib.parse.urlencode(
+            {
+                "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
+                "client_id": self.server.client_id,
+                "requested_token": "openai-api-key",
+                "subject_token": token_data.id_token,
+                "subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
+                "name": f"ChatMock [auto-generated] ({today})",
+            }
+        ).encode()
+
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.server.token_endpoint,
+                data=exchange_data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            exchange_payload = json.loads(resp.read().decode())
+            exchanged_access_token = exchange_payload.get("access_token")
+
+        chatgpt_plan_type = access_claims.get("chatgpt_plan_type")
+        success_url_query = {
+            "id_token": token_data.id_token,
+            "needs_setup": "false",
+            "org_id": org_id,
+            "project_id": project_id,
+            "plan_type": chatgpt_plan_type,
+            "platform_url": "https://platform.openai.com",
+        }
+        success_url = f"{URL_BASE}/success?{urllib.parse.urlencode(success_url_query)}"
+        return exchanged_access_token, success_url
diff --git a/chatmock/prompt.md b/chatmock/prompt.md
new file mode 100644
index 0000000000000000000000000000000000000000..7783dbd83f8e4f38317c73a973c43782079ea766
--- /dev/null
+++ b/chatmock/prompt.md
@@ -0,0 +1 @@
+../prompt.md
\ No newline at end of file
diff --git a/chatmock/prompt_gpt5_codex.md b/chatmock/prompt_gpt5_codex.md
new file mode 100644
index 0000000000000000000000000000000000000000..04aa304f44abe0603c22f0dc3d4dbf9e9e023179
--- /dev/null
+++ b/chatmock/prompt_gpt5_codex.md
@@ -0,0 +1 @@
+../prompt_gpt5_codex.md
\ No newline at end of file
diff --git a/chatmock/reasoning.py b/chatmock/reasoning.py
new file mode 100644
index 0000000000000000000000000000000000000000..37c276c3f96174d777aa1d2aae053c6376cfb92b
--- /dev/null
+++ b/chatmock/reasoning.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+from typing import Any, Dict
+
+from .model_registry import DEFAULT_REASONING_EFFORTS, allowed_efforts_for_model, extract_reasoning_from_model_name
+
+
+def build_reasoning_param(
+    base_effort: str = "medium",
+    base_summary: str = "auto",
+    overrides: Dict[str, Any] | None = None,
+    *,
+    allowed_efforts: frozenset[str] | None = None,
+) -> Dict[str, Any]:
+    effort = (base_effort or "").strip().lower()
+    summary = (base_summary or "").strip().lower()
+
+    valid_efforts = allowed_efforts or DEFAULT_REASONING_EFFORTS
+    valid_summaries = {"auto", "concise", "detailed", "none"}
+
+    if isinstance(overrides, dict):
+        o_eff = str(overrides.get("effort", "")).strip().lower()
+        o_sum = str(overrides.get("summary", "")).strip().lower()
+        if o_eff in valid_efforts and o_eff:
+            effort = o_eff
+        if o_sum in valid_summaries and o_sum:
+            summary = o_sum
+    if effort not in valid_efforts:
+        effort = "medium"
+    if summary not in valid_summaries:
+        summary = "auto"
+
+    reasoning: Dict[str, Any] = {"effort": effort}
+    if summary != "none":
+        reasoning["summary"] = summary
+    return reasoning
+
+
+def apply_reasoning_to_message(
+    message: Dict[str, Any],
+    reasoning_summary_text: str,
+    reasoning_full_text: str,
+    compat: str,
+) -> Dict[str, Any]:
+    try:
+        compat = (compat or "think-tags").strip().lower()
+    except Exception:
+        compat = "think-tags"
+
+    if compat == "o3":
+        rtxt_parts: list[str] = []
+        if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+            rtxt_parts.append(reasoning_summary_text)
+        if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+            rtxt_parts.append(reasoning_full_text)
+        rtxt = "\n\n".join([p for p in rtxt_parts if p])
+        if rtxt:
+            message["reasoning"] = {"content": [{"type": "text", "text": rtxt}]}
+        return message
+
+    if compat in ("legacy", "current"):
+        if reasoning_summary_text:
+            message["reasoning_summary"] = reasoning_summary_text
+        if reasoning_full_text:
+            message["reasoning"] = reasoning_full_text
+        return message
+
+    rtxt_parts: list[str] = []
+    if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+        rtxt_parts.append(reasoning_summary_text)
+    if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+        rtxt_parts.append(reasoning_full_text)
+    rtxt = "\n\n".join([p for p in rtxt_parts if p])
+    if rtxt:
+        think_block = f"<think>{rtxt}</think>"
+        content_text = message.get("content") or ""
+        if isinstance(content_text, str):
+            message["content"] = think_block + (content_text or "")
+    return message
diff --git a/chatmock/responses_api.py b/chatmock/responses_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..51bda2ac5203dbec8f36cd7ba5d03204085b310b
--- /dev/null
+++ b/chatmock/responses_api.py
@@ -0,0 +1,243 @@
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, Iterator, List
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import ServiceTierResolution, resolve_service_tier
+from .model_registry import (
+    allowed_efforts_for_model,
+    extract_reasoning_from_model_name,
+    normalize_model_name,
+    uses_codex_instructions,
+)
+from .reasoning import build_reasoning_param
+from .session import ensure_session_id
+
+
+@dataclass(frozen=True)
+class ResponsesRequestError(Exception):
+    message: str
+    status_code: int = 400
+    code: str | None = None
+
+    def __str__(self) -> str:
+        return self.message
+
+
+@dataclass(frozen=True)
+class NormalizedResponsesRequest:
+    payload: Dict[str, Any]
+    requested_model: str | None
+    normalized_model: str
+    session_id: str
+    service_tier_resolution: ServiceTierResolution
+
+
+def instructions_for_model(config: Dict[str, Any], model: str) -> str:
+    base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
+    if uses_codex_instructions(model):
+        codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
+        if isinstance(codex, str) and codex.strip():
+            return codex
+    return base
+
+
+def extract_client_session_id(headers: Any) -> str | None:
+    try:
+        return headers.get("X-Session-Id") or headers.get("session_id") or None
+    except Exception:
+        return None
+
+
+def _input_items_for_session(raw_input: Any) -> List[Dict[str, Any]]:
+    if isinstance(raw_input, list):
+        return [item for item in raw_input if isinstance(item, dict)]
+    if isinstance(raw_input, dict):
+        return [raw_input]
+    if isinstance(raw_input, str) and raw_input.strip():
+        return [
+            {
+                "type": "message",
+                "role": "user",
+                "content": [{"type": "input_text", "text": raw_input}],
+            }
+        ]
+    return []
+
+
+def canonicalize_responses_input(raw_input: Any) -> Any:
+    if isinstance(raw_input, list):
+        return [item for item in raw_input if isinstance(item, dict)]
+    if isinstance(raw_input, dict):
+        return [raw_input]
+    if isinstance(raw_input, str):
+        return _input_items_for_session(raw_input)
+    return raw_input
+
+
+def normalize_responses_payload(
+    payload: Dict[str, Any],
+    *,
+    config: Dict[str, Any],
+    client_session_id: str | None = None,
+) -> NormalizedResponsesRequest:
+    requested_model = payload.get("model") if isinstance(payload.get("model"), str) else None
+    normalized_model = normalize_model_name(requested_model, config.get("DEBUG_MODEL"))
+
+    normalized = dict(payload)
+    normalized["model"] = normalized_model
+    normalized.pop("max_output_tokens", None)
+
+    if "input" in normalized:
+        normalized["input"] = canonicalize_responses_input(normalized.get("input"))
+
+    if "store" not in normalized:
+        normalized["store"] = False
+
+    instructions = normalized.get("instructions")
+    if not isinstance(instructions, str) or not instructions.strip():
+        instructions = instructions_for_model(config, normalized_model)
+        normalized["instructions"] = instructions
+
+    reasoning_effort = config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = config.get("REASONING_SUMMARY", "auto")
+    reasoning_overrides = (
+        normalized.get("reasoning")
+        if isinstance(normalized.get("reasoning"), dict)
+        else extract_reasoning_from_model_name(requested_model)
+    )
+    normalized["reasoning"] = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(normalized_model),
+    )
+
+    include = normalized.get("include")
+    include_list = [item for item in include if isinstance(item, str)] if isinstance(include, list) else []
+    if "reasoning.encrypted_content" not in include_list:
+        include_list.append("reasoning.encrypted_content")
+    normalized["include"] = include_list
+
+    tools = normalized.get("tools")
+    if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")):
+        tool_choice = normalized.get("tool_choice")
+        if not (isinstance(tool_choice, str) and tool_choice.strip().lower() == "none"):
+            normalized["tools"] = [{"type": "web_search"}]
+
+    service_tier_resolution = resolve_service_tier(
+        normalized_model,
+        request_fast_mode=normalized.get("fast_mode"),
+        request_service_tier=normalized.get("service_tier"),
+        server_fast_mode=bool(config.get("FAST_MODE")),
+    )
+    if service_tier_resolution.error_message:
+        raise ResponsesRequestError(service_tier_resolution.error_message)
+    if service_tier_resolution.service_tier is None:
+        normalized.pop("service_tier", None)
+    else:
+        normalized["service_tier"] = service_tier_resolution.service_tier
+    normalized.pop("fast_mode", None)
+
+    input_items = _input_items_for_session(normalized.get("input"))
+    session_id = ensure_session_id(instructions, input_items, client_session_id)
+    prompt_cache_key = normalized.get("prompt_cache_key")
+    if not isinstance(prompt_cache_key, str) or not prompt_cache_key.strip():
+        normalized["prompt_cache_key"] = session_id
+
+    return NormalizedResponsesRequest(
+        payload=normalized,
+        requested_model=requested_model,
+        normalized_model=normalized_model,
+        session_id=session_id,
+        service_tier_resolution=service_tier_resolution,
+    )
+
+
+def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]:
+    for raw in upstream.iter_lines(decode_unicode=False):
+        if not raw:
+            continue
+        line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+        if not line.startswith("data: "):
+            continue
+        data = line[len("data: ") :].strip()
+        if not data or data == "[DONE]":
+            if data == "[DONE]":
+                break
+            continue
+        try:
+            evt = json.loads(data)
+        except Exception:
+            continue
+        if isinstance(evt, dict):
+            yield evt
+
+
+def aggregate_response_from_sse(
+    upstream: Any,
+    *,
+    on_event: Any | None = None,
+) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]:
+    response_obj: Dict[str, Any] | None = None
+    error_obj: Dict[str, Any] | None = None
+    try:
+        for evt in iter_sse_event_payloads(upstream):
+            if callable(on_event):
+                try:
+                    on_event(evt)
+                except Exception:
+                    pass
+            response = evt.get("response")
+            if isinstance(response, dict):
+                response_obj = response
+            kind = evt.get("type")
+            if kind == "response.failed":
+                if isinstance(response, dict) and isinstance(response.get("error"), dict):
+                    error_obj = {"error": response.get("error")}
+                else:
+                    error_obj = {"error": {"message": "response.failed"}}
+                break
+            if kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+    return response_obj, error_obj
+
+
+def stream_upstream_bytes(
+    upstream: Any,
+    *,
+    on_event: Any | None = None,
+) -> Iterable[bytes]:
+    buffer = b""
+    try:
+        for chunk in upstream.iter_content(chunk_size=None):
+            if chunk:
+                if callable(on_event):
+                    if isinstance(chunk, bytes):
+                        buffer += chunk
+                    else:
+                        buffer += str(chunk).encode("utf-8", errors="ignore")
+                    while b"\n" in buffer:
+                        line, buffer = buffer.split(b"\n", 1)
+                        line = line.rstrip(b"\r")
+                        if not line.startswith(b"data: "):
+                            continue
+                        data = line[len(b"data: ") :].strip()
+                        if not data or data == b"[DONE]":
+                            continue
+                        try:
+                            evt = json.loads(data.decode("utf-8", errors="ignore"))
+                        except Exception:
+                            evt = None
+                        if isinstance(evt, dict):
+                            try:
+                                on_event(evt)
+                            except Exception:
+                                pass
+                yield chunk
+    finally:
+        upstream.close()
diff --git a/chatmock/routes_custom.py b/chatmock/routes_custom.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e8a2a31f47e9a4dfd9e434c79afeee978f978b6
--- /dev/null
+++ b/chatmock/routes_custom.py
@@ -0,0 +1,108 @@
+from __future__ import annotations
+
+import json
+from flask import Blueprint, jsonify, request, current_app
+from .routes_openai import chat_completions
+
+custom_bp = Blueprint("custom", __name__)
+
+@custom_bp.route("/api", methods=["POST"])
+@custom_bp.route("/model/api", methods=["POST"])
+@custom_bp.route("/<model_name>/api", methods=["POST"])
+def custom_api(model_name: str | None = None):
+    raw = request.get_data(as_text=True) or ""
+    try:
+        payload = json.loads(raw)
+    except Exception:
+        return jsonify({"status": "error", "message": "Invalid JSON"}), 400
+    
+    prompt = payload.get("prompt")
+    if not prompt:
+        return jsonify({"status": "error", "message": "Missing prompt"}), 400
+
+    from .upstream import normalize_model_name, start_upstream_request
+    from .responses_api import instructions_for_model
+    from .reasoning import build_reasoning_param, allowed_efforts_for_model
+    
+    # Use model_name from URL if provided, otherwise default to config or None
+    model = normalize_model_name(model_name, current_app.config.get("DEBUG_MODEL"))
+    
+    input_items = [
+        {"type": "message", "role": "user", "content": [{"type": "input_text", "text": prompt}]}
+    ]
+    
+    reasoning_param = build_reasoning_param(
+        current_app.config.get("REASONING_EFFORT", "medium"),
+        current_app.config.get("REASONING_SUMMARY", "auto"),
+        None,
+        allowed_efforts=allowed_efforts_for_model(model)
+    )
+    
+    instructions = instructions_for_model(current_app.config, model)
+    
+    upstream, error_resp = start_upstream_request(
+        model,
+        input_items,
+        instructions=instructions,
+        reasoning_param=reasoning_param
+    )
+    
+    if error_resp:
+        return jsonify({"status": "error", "message": "Upstream error"}), 502
+        
+    if upstream.status_code >= 400:
+        return jsonify({"status": "error", "message": f"Upstream returned {upstream.status_code}"}), upstream.status_code
+
+    full_text = ""
+    try:
+        for raw_line in upstream.iter_lines(decode_unicode=False):
+            if not raw_line: continue
+            line = raw_line.decode("utf-8", errors="ignore")
+            if not line.startswith("data: "): continue
+            data = line[len("data: "):].strip()
+            if not data or data == "[DONE]": break
+            try:
+                evt = json.loads(data)
+                if evt.get("type") == "response.output_text.delta":
+                    full_text += evt.get("delta") or ""
+                elif evt.get("type") == "response.completed":
+                    break
+            except Exception:
+                continue
+    finally:
+        upstream.close()
+
+    # Clean the text: remove markdown, special unicode, and newlines
+    clean_text = full_text.strip()
+    
+    # 1. Handle common special unicode characters
+    replacements = {
+        "\u2019": "'",
+        "\u2018": "'",
+        "\u201d": '"',
+        "\u201c": '"',
+        "\u2014": "-",
+        "\u2013": "-",
+        "\u2022": "*",
+        "\u2026": "..."
+    }
+    for old, new in replacements.items():
+        clean_text = clean_text.replace(old, new)
+        
+    # 2. Strip basic markdown (bold, italics)
+    import re
+    clean_text = re.sub(r'\*\*(.*?)\*\*', r'\1', clean_text) # bold
+    clean_text = re.sub(r'\*(.*?)\*', r'\1', clean_text)     # italics
+    clean_text = re.sub(r'__(.*?)__', r'\1', clean_text)     # bold
+    clean_text = re.sub(r'_(.*?)_', r'\1', clean_text)       # italics
+    
+    # 3. Handle newlines: replace with spaces
+    clean_text = clean_text.replace("\n\n", " ").replace("\n", " ")
+    
+    # 4. Final trim
+    clean_text = re.sub(r'\s+', ' ', clean_text).strip()
+
+    return jsonify({
+        "status": "success",
+        "text": clean_text
+    })
diff --git a/chatmock/routes_ollama.py b/chatmock/routes_ollama.py
new file mode 100644
index 0000000000000000000000000000000000000000..5da18d0e4156f77092fcf287c8be6c9208f81096
--- /dev/null
+++ b/chatmock/routes_ollama.py
@@ -0,0 +1,585 @@
+from __future__ import annotations
+
+import json
+import datetime
+import time
+from typing import Any, Dict, List
+
+from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import resolve_service_tier
+from .limits import record_rate_limits_from_response
+from .http import build_cors_headers
+from .model_registry import list_public_models, uses_codex_instructions
+from .responses_api import instructions_for_model
+from .reasoning import (
+    allowed_efforts_for_model,
+    build_reasoning_param,
+    extract_reasoning_from_model_name,
+)
+from .transform import convert_ollama_messages, normalize_ollama_tools
+from .upstream import normalize_model_name, start_upstream_request
+from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_to_responses
+
+
+ollama_bp = Blueprint("ollama", __name__)
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+
+
+def _wrap_stream_logging(label: str, iterator, enabled: bool):
+    if not enabled:
+        return iterator
+
+    def _gen():
+        for chunk in iterator:
+            try:
+                text = (
+                    chunk.decode("utf-8", errors="replace")
+                    if isinstance(chunk, (bytes, bytearray))
+                    else str(chunk)
+                )
+                print(f"{label}\n{text}")
+            except Exception:
+                pass
+            yield chunk
+
+    return _gen()
+
+
+@ollama_bp.route("/api/version", methods=["GET"])
+def ollama_version() -> Response:
+    if bool(current_app.config.get("VERBOSE")):
+        print("IN GET /api/version")
+    version = current_app.config.get("OLLAMA_VERSION", "0.12.10")
+    if not isinstance(version, str) or not version.strip():
+        version = "0.12.10"
+    payload = {"version": version}
+    resp = make_response(jsonify(payload), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    if bool(current_app.config.get("VERBOSE")):
+        _log_json("OUT GET /api/version", payload)
+    return resp
+
+
+def _instructions_for_model(model: str) -> str:
+    return instructions_for_model(current_app.config, model)
+
+
+_OLLAMA_FAKE_EVAL = {
+    "total_duration": 8497226791,
+    "load_duration": 1747193958,
+    "prompt_eval_count": 24,
+    "prompt_eval_duration": 269219750,
+    "eval_count": 247,
+    "eval_duration": 6413802458,
+}
+
+
+@ollama_bp.route("/api/tags", methods=["GET"])
+def ollama_tags() -> Response:
+    if bool(current_app.config.get("VERBOSE")):
+        print("IN GET /api/tags")
+    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
+    model_ids = list_public_models(expose_reasoning_models=expose_variants)
+    models = []
+    for model_id in model_ids:
+        models.append(
+            {
+                "name": model_id,
+                "model": model_id,
+                "modified_at": "2023-10-01T00:00:00Z",
+                "size": 815319791,
+                "digest": "8648f39daa8fbf5b18c7b4e6a8fb4990c692751d49917417b8842ca5758e7ffc",
+                "details": {
+                    "parent_model": "",
+                    "format": "gguf",
+                    "family": "llama",
+                    "families": ["llama"],
+                    "parameter_size": "8.0B",
+                    "quantization_level": "Q4_0",
+                },
+            }
+        )
+    payload = {"models": models}
+    resp = make_response(jsonify(payload), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    if bool(current_app.config.get("VERBOSE")):
+        _log_json("OUT GET /api/tags", payload)
+    return resp
+
+
+@ollama_bp.route("/api/show", methods=["POST"])
+def ollama_show() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    raw_body = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /api/show\n" + raw_body)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw_body) if raw_body else (request.get_json(silent=True) or {})
+    except Exception:
+        payload = request.get_json(silent=True) or {}
+    model = payload.get("model")
+    if not isinstance(model, str) or not model.strip():
+        err = {"error": "Model not found"}
+        if verbose:
+            _log_json("OUT POST /api/show", err)
+        return jsonify(err), 400
+    v1_show_response = {
+        "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /models/blobs/sha256:placeholder\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 100000\nPARAMETER stop \"</s>\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
+        "parameters": "num_keep 24\nstop \"<|start_header_id|>\"\nstop \"<|end_header_id|>\"\nstop \"<|eot_id|>\"",
+        "template": "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>",
+        "details": {
+            "parent_model": "",
+            "format": "gguf",
+            "family": "llama",
+            "families": ["llama"],
+            "parameter_size": "8.0B",
+            "quantization_level": "Q4_0",
+        },
+        "model_info": {
+            "general.architecture": "llama",
+            "general.file_type": 2,
+            "llama.context_length": 2000000,
+        },
+        "capabilities": ["completion", "vision", "tools", "thinking"],
+    }
+    if verbose:
+        _log_json("OUT POST /api/show", v1_show_response)
+    resp = make_response(jsonify(v1_show_response), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+
+
+@ollama_bp.route("/api/chat", methods=["POST"])
+def ollama_chat() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+    reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
+
+    try:
+        raw = request.get_data(cache=True, as_text=True) or ""
+        if verbose:
+            print("IN POST /api/chat\n" + (raw if isinstance(raw, str) else ""))
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": "Invalid JSON body"}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+
+    model = payload.get("model")
+    raw_messages = payload.get("messages")
+    messages = convert_ollama_messages(
+        raw_messages, payload.get("images") if isinstance(payload.get("images"), list) else None
+    )
+    if isinstance(messages, list):
+        sys_idx = next((i for i, m in enumerate(messages) if isinstance(m, dict) and m.get("role") == "system"), None)
+        if isinstance(sys_idx, int):
+            sys_msg = messages.pop(sys_idx)
+            content = sys_msg.get("content") if isinstance(sys_msg, dict) else ""
+            messages.insert(0, {"role": "user", "content": content})
+    stream_req = payload.get("stream")
+    if stream_req is None:
+        stream_req = True
+    stream_req = bool(stream_req)
+    tools_req = payload.get("tools") if isinstance(payload.get("tools"), list) else []
+    tools_responses = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
+    tool_choice = payload.get("tool_choice", "auto")
+    parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
+
+    # Passthrough Responses API tools (web_search) via ChatMock extension fields
+    extra_tools: List[Dict[str, Any]] = []
+    had_responses_tools = False
+    rt_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
+    if isinstance(rt_payload, list):
+        for _t in rt_payload:
+            if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
+                continue
+            if _t.get("type") not in ("web_search", "web_search_preview"):
+                err = {"error": "Only web_search/web_search_preview are supported in responses_tools"}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), 400
+            extra_tools.append(_t)
+        if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
+            rtc = payload.get("responses_tool_choice")
+            if not (isinstance(rtc, str) and rtc == "none"):
+                extra_tools = [{"type": "web_search"}]
+        if extra_tools:
+            import json as _json
+            MAX_TOOLS_BYTES = 32768
+            try:
+                size = len(_json.dumps(extra_tools))
+            except Exception:
+                size = 0
+            if size > MAX_TOOLS_BYTES:
+                err = {"error": "responses_tools too large"}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), 400
+            had_responses_tools = True
+            tools_responses = (tools_responses or []) + extra_tools
+
+    rtc = payload.get("responses_tool_choice")
+    if isinstance(rtc, str) and rtc in ("auto", "none"):
+        tool_choice = rtc
+
+    if not isinstance(model, str) or not isinstance(messages, list) or not messages:
+        err = {"error": "Invalid request format"}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+
+    input_items = convert_chat_messages_to_responses_input(messages)
+
+    model_reasoning = extract_reasoning_from_model_name(model)
+    normalized_model = normalize_model_name(model, current_app.config.get("DEBUG_MODEL"))
+    service_tier_resolution = resolve_service_tier(
+        normalized_model,
+        request_fast_mode=payload.get("fast_mode"),
+        request_service_tier=payload.get("service_tier"),
+        server_fast_mode=bool(current_app.config.get("FAST_MODE")),
+    )
+    if service_tier_resolution.warning_message and verbose:
+        print(f"[FastMode] {service_tier_resolution.warning_message}")
+    if service_tier_resolution.error_message:
+        err = {"error": service_tier_resolution.error_message}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+    upstream, error_resp = start_upstream_request(
+        normalized_model,
+        input_items,
+        instructions=_instructions_for_model(normalized_model),
+        tools=tools_responses,
+        tool_choice=tool_choice,
+        parallel_tool_calls=parallel_tool_calls,
+        reasoning_param=build_reasoning_param(
+            reasoning_effort,
+            reasoning_summary,
+            model_reasoning,
+            allowed_efforts=allowed_efforts_for_model(model),
+        ),
+        service_tier=service_tier_resolution.service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /api/chat", parsed)
+            except Exception:
+                pass
+        return error_resp
+
+    record_rate_limits_from_response(upstream)
+
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        if had_responses_tools:
+            if verbose:
+                print("[Passthrough] Upstream rejected tools; retrying without extras (args redacted)")
+            base_tools_only = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
+            safe_choice = payload.get("tool_choice", "auto")
+            upstream2, err2 = start_upstream_request(
+                normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
+                input_items,
+                instructions=BASE_INSTRUCTIONS,
+                tools=base_tools_only,
+                tool_choice=safe_choice,
+                parallel_tool_calls=parallel_tool_calls,
+                reasoning_param=build_reasoning_param(
+                    reasoning_effort,
+                    reasoning_summary,
+                    model_reasoning,
+                    allowed_efforts=allowed_efforts_for_model(model),
+                ),
+                service_tier=service_tier_resolution.service_tier,
+            )
+            record_rate_limits_from_response(upstream2)
+            if err2 is None and upstream2 is not None and upstream2.status_code < 400:
+                upstream = upstream2
+            else:
+                err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
+        else:
+            if verbose:
+                print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
+            err = {"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}
+            if verbose:
+                _log_json("OUT POST /api/chat", err)
+            return jsonify(err), upstream.status_code
+
+    created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+    model_out = model if isinstance(model, str) and model.strip() else normalized_model
+
+    if stream_req:
+        def _gen():
+            compat = (current_app.config.get("REASONING_COMPAT", "think-tags") or "think-tags").strip().lower()
+            think_open = False
+            think_closed = False
+            saw_any_summary = False
+            pending_summary_paragraph = False
+            full_parts: List[str] = []
+            try:
+                for raw_line in upstream.iter_lines(decode_unicode=False):
+                    if not raw_line:
+                        continue
+                    line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+                    if not line.startswith("data: "):
+                        continue
+                    data = line[len("data: "):].strip()
+                    if not data:
+                        continue
+                    if data == "[DONE]":
+                        break
+                    try:
+                        evt = json.loads(data)
+                    except Exception:
+                        continue
+                    kind = evt.get("type")
+                    if kind == "response.reasoning_summary_part.added":
+                        if compat in ("think-tags", "o3"):
+                            if saw_any_summary:
+                                pending_summary_paragraph = True
+                            else:
+                                saw_any_summary = True
+                    elif kind in ("response.reasoning_summary_text.delta", "response.reasoning_text.delta"):
+                        delta_txt = evt.get("delta") or ""
+                        if compat == "o3":
+                            if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": "\n"},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append("\n")
+                                pending_summary_paragraph = False
+                            if delta_txt:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": delta_txt},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append(delta_txt)
+                        elif compat == "think-tags":
+                            if not think_open and not think_closed:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": "<think>"},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append("<think>")
+                                think_open = True
+                            if think_open and not think_closed:
+                                if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                                    yield (
+                                        json.dumps(
+                                            {
+                                                "model": model_out,
+                                                "created_at": created_at,
+                                                "message": {"role": "assistant", "content": "\n"},
+                                                "done": False,
+                                            }
+                                        )
+                                        + "\n"
+                                    )
+                                    full_parts.append("\n")
+                                    pending_summary_paragraph = False
+                                if delta_txt:
+                                    yield (
+                                        json.dumps(
+                                            {
+                                                "model": model_out,
+                                                "created_at": created_at,
+                                                "message": {"role": "assistant", "content": delta_txt},
+                                                "done": False,
+                                            }
+                                        )
+                                        + "\n"
+                                    )
+                                    full_parts.append(delta_txt)
+                        else:
+                            pass
+                    elif kind == "response.output_text.delta":
+                        delta = evt.get("delta") or ""
+                        if compat == "think-tags" and think_open and not think_closed:
+                            yield (
+                                json.dumps(
+                                    {
+                                        "model": model_out,
+                                        "created_at": created_at,
+                                        "message": {"role": "assistant", "content": "</think>"},
+                                        "done": False,
+                                    }
+                                )
+                                + "\n"
+                            )
+                            full_parts.append("</think>")
+                            think_open = False
+                            think_closed = True
+                        if delta:
+                            yield (
+                                json.dumps(
+                                    {
+                                        "model": model_out,
+                                        "created_at": created_at,
+                                        "message": {"role": "assistant", "content": delta},
+                                        "done": False,
+                                    }
+                                )
+                                + "\n"
+                            )
+                            full_parts.append(delta)
+                    elif kind == "response.completed":
+                        break
+            finally:
+                upstream.close()
+                if compat == "think-tags" and think_open and not think_closed:
+                    yield (
+                        json.dumps(
+                            {
+                                "model": model_out,
+                                "created_at": created_at,
+                                "message": {"role": "assistant", "content": "</think>"},
+                                "done": False,
+                            }
+                        )
+                        + "\n"
+                    )
+                    full_parts.append("</think>")
+                done_obj = {
+                    "model": model_out,
+                    "created_at": created_at,
+                    "message": {"role": "assistant", "content": ""},
+                    "done": True,
+                }
+                done_obj.update(_OLLAMA_FAKE_EVAL)
+                yield json.dumps(done_obj) + "\n"
+        if verbose:
+            print("OUT POST /api/chat (streaming response)")
+        stream_iter = stream_with_context(_gen())
+        stream_iter = _wrap_stream_logging("STREAM OUT /api/chat", stream_iter, verbose)
+        resp = current_app.response_class(
+            stream_iter,
+            status=200,
+            mimetype="application/x-ndjson",
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    full_text = ""
+    reasoning_summary_text = ""
+    reasoning_full_text = ""
+    tool_calls: List[Dict[str, Any]] = []
+    try:
+        for raw in upstream.iter_lines(decode_unicode=False):
+            if not raw:
+                continue
+            line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data:
+                continue
+            if data == "[DONE]":
+                break
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_summary_text.delta":
+                reasoning_summary_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_text.delta":
+                reasoning_full_text += evt.get("delta") or ""
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and item.get("type") == "function_call":
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ""
+                    args = item.get("arguments") or ""
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        tool_calls.append(
+                            {
+                                "id": call_id,
+                                "type": "function",
+                                "function": {"name": name, "arguments": args},
+                            }
+                        )
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+
+    if (current_app.config.get("REASONING_COMPAT", "think-tags") or "think-tags").strip().lower() == "think-tags":
+        rtxt_parts = []
+        if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+            rtxt_parts.append(reasoning_summary_text)
+        if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+            rtxt_parts.append(reasoning_full_text)
+        rtxt = "\n\n".join([p for p in rtxt_parts if p])
+        if rtxt:
+            full_text = f"<think>{rtxt}</think>" + (full_text or "")
+
+    out_json = {
+        "model": normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
+        "created_at": created_at,
+        "message": {"role": "assistant", "content": full_text, **({"tool_calls": tool_calls} if tool_calls else {})},
+        "done": True,
+        "done_reason": "stop",
+    }
+    out_json.update(_OLLAMA_FAKE_EVAL)
+    if verbose:
+        _log_json("OUT POST /api/chat", out_json)
+    resp = make_response(jsonify(out_json), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
diff --git a/chatmock/routes_openai.py b/chatmock/routes_openai.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb378425ecebf4e71a51d21bdbfb4b0527f3fc1c
--- /dev/null
+++ b/chatmock/routes_openai.py
@@ -0,0 +1,738 @@
+from __future__ import annotations
+
+import json
+import time
+from typing import Any, Dict, List
+
+from flask import Blueprint, Response, current_app, jsonify, make_response, request
+
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import resolve_service_tier
+from .limits import record_rate_limits_from_response
+from .http import build_cors_headers
+from .model_registry import list_public_models, uses_codex_instructions
+from .responses_api import (
+    ResponsesRequestError,
+    aggregate_response_from_sse,
+    extract_client_session_id,
+    instructions_for_model,
+    normalize_responses_payload,
+    stream_upstream_bytes,
+)
+from .reasoning import (
+    allowed_efforts_for_model,
+    apply_reasoning_to_message,
+    build_reasoning_param,
+    extract_reasoning_from_model_name,
+)
+from .session import (
+    clear_responses_reuse_state,
+    note_responses_final_response,
+    note_responses_stream_event,
+    prepare_responses_request_for_session,
+)
+from .upstream import normalize_model_name, start_upstream_raw_request, start_upstream_request
+from .utils import (
+    convert_chat_messages_to_responses_input,
+    convert_tools_chat_to_responses,
+    sse_translate_chat,
+    sse_translate_text,
+)
+
+
+openai_bp = Blueprint("openai", __name__)
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+
+
+def _wrap_stream_logging(label: str, iterator, enabled: bool):
+    if not enabled:
+        return iterator
+
+    def _gen():
+        for chunk in iterator:
+            try:
+                text = (
+                    chunk.decode("utf-8", errors="replace")
+                    if isinstance(chunk, (bytes, bytearray))
+                    else str(chunk)
+                )
+                print(f"{label}\n{text}")
+            except Exception:
+                pass
+            yield chunk
+
+    return _gen()
+
+
+def _instructions_for_model(model: str) -> str:
+    return instructions_for_model(current_app.config, model)
+
+
+def _service_tier_from_payload(
+    model: str,
+    payload: Dict[str, Any],
+    *,
+    verbose: bool = False,
+) -> tuple[str | None, Response | None]:
+    resolution = resolve_service_tier(
+        model,
+        request_fast_mode=payload.get("fast_mode"),
+        request_service_tier=payload.get("service_tier"),
+        server_fast_mode=bool(current_app.config.get("FAST_MODE")),
+    )
+    if resolution.warning_message and verbose:
+        print(f"[FastMode] {resolution.warning_message}")
+    if resolution.error_message:
+        err = {"error": {"message": resolution.error_message}}
+        if verbose:
+            _log_json("OUT POST service_tier resolution", err)
+        resp = make_response(jsonify(err), 400)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+    return resolution.service_tier, None
+
+
+@openai_bp.route("/v1/chat/completions", methods=["POST"])
+def chat_completions() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+    reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
+
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/chat/completions\n" + raw)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        try:
+            payload = json.loads(raw.replace("\r", "").replace("\n", ""))
+        except Exception:
+            err = {"error": {"message": "Invalid JSON body"}}
+            if verbose:
+                _log_json("OUT POST /v1/chat/completions", err)
+            return jsonify(err), 400
+
+    requested_model = payload.get("model")
+    model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
+    messages = payload.get("messages")
+    if messages is None and isinstance(payload.get("prompt"), str):
+        messages = [{"role": "user", "content": payload.get("prompt") or ""}]
+    if messages is None and isinstance(payload.get("input"), str):
+        messages = [{"role": "user", "content": payload.get("input") or ""}]
+    if messages is None:
+        messages = []
+    if not isinstance(messages, list):
+        err = {"error": {"message": "Request must include messages: []"}}
+        if verbose:
+            _log_json("OUT POST /v1/chat/completions", err)
+        return jsonify(err), 400
+
+    if isinstance(messages, list):
+        sys_idx = next((i for i, m in enumerate(messages) if isinstance(m, dict) and m.get("role") == "system"), None)
+        if isinstance(sys_idx, int):
+            sys_msg = messages.pop(sys_idx)
+            content = sys_msg.get("content") if isinstance(sys_msg, dict) else ""
+            messages.insert(0, {"role": "user", "content": content})
+    is_stream = bool(payload.get("stream"))
+    stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {}
+    include_usage = bool(stream_options.get("include_usage", False))
+
+    tools_responses = convert_tools_chat_to_responses(payload.get("tools"))
+    tool_choice = payload.get("tool_choice", "auto")
+    parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
+    responses_tools_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
+    extra_tools: List[Dict[str, Any]] = []
+    had_responses_tools = False
+    if isinstance(responses_tools_payload, list):
+        for _t in responses_tools_payload:
+            if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
+                continue
+            if _t.get("type") not in ("web_search", "web_search_preview"):
+                err = {
+                    "error": {
+                        "message": "Only web_search/web_search_preview are supported in responses_tools",
+                        "code": "RESPONSES_TOOL_UNSUPPORTED",
+                    }
+                }
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), 400
+            extra_tools.append(_t)
+
+        if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
+            responses_tool_choice = payload.get("responses_tool_choice")
+            if not (isinstance(responses_tool_choice, str) and responses_tool_choice == "none"):
+                extra_tools = [{"type": "web_search"}]
+
+        if extra_tools:
+            import json as _json
+            MAX_TOOLS_BYTES = 32768
+            try:
+                size = len(_json.dumps(extra_tools))
+            except Exception:
+                size = 0
+            if size > MAX_TOOLS_BYTES:
+                err = {"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), 400
+            had_responses_tools = True
+            tools_responses = (tools_responses or []) + extra_tools
+
+    responses_tool_choice = payload.get("responses_tool_choice")
+    if isinstance(responses_tool_choice, str) and responses_tool_choice in ("auto", "none"):
+        tool_choice = responses_tool_choice
+
+    input_items = convert_chat_messages_to_responses_input(messages)
+    if not input_items and isinstance(payload.get("prompt"), str) and payload.get("prompt").strip():
+        input_items = [
+            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": payload.get("prompt")}]}
+        ]
+
+    model_reasoning = extract_reasoning_from_model_name(requested_model)
+    reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
+    reasoning_param = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(model),
+    )
+    service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose)
+    if tier_error is not None:
+        return tier_error
+
+    upstream, error_resp = start_upstream_request(
+        model,
+        input_items,
+        instructions=_instructions_for_model(model),
+        tools=tools_responses,
+        tool_choice=tool_choice,
+        parallel_tool_calls=parallel_tool_calls,
+        reasoning_param=reasoning_param,
+        service_tier=service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/chat/completions", parsed)
+            except Exception:
+                pass
+        return error_resp
+
+    record_rate_limits_from_response(upstream)
+
+    created = int(time.time())
+    if upstream.status_code >= 400:
+        try:
+            raw = upstream.content
+            err_body = json.loads(raw.decode("utf-8", errors="ignore")) if raw else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        if had_responses_tools:
+            if verbose:
+                print("[Passthrough] Upstream rejected tools; retrying without extra tools (args redacted)")
+            base_tools_only = convert_tools_chat_to_responses(payload.get("tools"))
+            safe_choice = payload.get("tool_choice", "auto")
+            upstream2, err2 = start_upstream_request(
+                model,
+                input_items,
+                instructions=BASE_INSTRUCTIONS,
+                tools=base_tools_only,
+                tool_choice=safe_choice,
+                parallel_tool_calls=parallel_tool_calls,
+                reasoning_param=reasoning_param,
+                service_tier=service_tier,
+            )
+            record_rate_limits_from_response(upstream2)
+            if err2 is None and upstream2 is not None and upstream2.status_code < 400:
+                upstream = upstream2
+            else:
+                err = {
+                    "error": {
+                        "message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
+                        "code": "RESPONSES_TOOLS_REJECTED",
+                    }
+                }
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
+        else:
+            if verbose:
+                print("Upstream error status=", upstream.status_code)
+            err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
+            if verbose:
+                _log_json("OUT POST /v1/chat/completions", err)
+            return jsonify(err), upstream.status_code
+
+    if is_stream:
+        if verbose:
+            print("OUT POST /v1/chat/completions (streaming response)")
+        stream_iter = sse_translate_chat(
+            upstream,
+            requested_model or model,
+            created,
+            verbose=verbose_obfuscation,
+            vlog=print if verbose_obfuscation else None,
+            reasoning_compat=reasoning_compat,
+            include_usage=include_usage,
+        )
+        stream_iter = _wrap_stream_logging("STREAM OUT /v1/chat/completions", stream_iter, verbose)
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    full_text = ""
+    reasoning_summary_text = ""
+    reasoning_full_text = ""
+    response_id = "chatcmpl"
+    tool_calls: List[Dict[str, Any]] = []
+    error_message: str | None = None
+    usage_obj: Dict[str, int] | None = None
+
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw in upstream.iter_lines(decode_unicode=False):
+            if not raw:
+                continue
+            line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data:
+                continue
+            if data == "[DONE]":
+                break
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            mu = _extract_usage(evt)
+            if mu:
+                usage_obj = mu
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_summary_text.delta":
+                reasoning_summary_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_text.delta":
+                reasoning_full_text += evt.get("delta") or ""
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and item.get("type") == "function_call":
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ""
+                    args = item.get("arguments") or ""
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        tool_calls.append(
+                            {
+                                "id": call_id,
+                                "type": "function",
+                                "function": {"name": name, "arguments": args},
+                            }
+                        )
+            elif kind == "response.failed":
+                error_message = evt.get("response", {}).get("error", {}).get("message", "response.failed")
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+
+    if error_message:
+        resp = make_response(jsonify({"error": {"message": error_message}}), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    message: Dict[str, Any] = {"role": "assistant", "content": full_text if full_text else None}
+    if tool_calls:
+        message["tool_calls"] = tool_calls
+    message = apply_reasoning_to_message(message, reasoning_summary_text, reasoning_full_text, reasoning_compat)
+    completion = {
+        "id": response_id or "chatcmpl",
+        "object": "chat.completion",
+        "created": created,
+        "model": requested_model or model,
+        "choices": [
+            {
+                "index": 0,
+                "message": message,
+                "finish_reason": "stop",
+            }
+        ],
+        **({"usage": usage_obj} if usage_obj else {}),
+    }
+    if verbose:
+        _log_json("OUT POST /v1/chat/completions", completion)
+    resp = make_response(jsonify(completion), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+
+
+@openai_bp.route("/v1/completions", methods=["POST"])
+def completions() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/completions\n" + raw)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": {"message": "Invalid JSON body"}}
+        if verbose:
+            _log_json("OUT POST /v1/completions", err)
+        return jsonify(err), 400
+
+    requested_model = payload.get("model")
+    model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
+    prompt = payload.get("prompt")
+    if isinstance(prompt, list):
+        prompt = "".join([p if isinstance(p, str) else "" for p in prompt])
+    if not isinstance(prompt, str):
+        prompt = payload.get("suffix") or ""
+    stream_req = bool(payload.get("stream", False))
+    stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {}
+    include_usage = bool(stream_options.get("include_usage", False))
+
+    messages = [{"role": "user", "content": prompt or ""}]
+    input_items = convert_chat_messages_to_responses_input(messages)
+
+    model_reasoning = extract_reasoning_from_model_name(requested_model)
+    reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
+    reasoning_param = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(model),
+    )
+    service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose)
+    if tier_error is not None:
+        return tier_error
+    upstream, error_resp = start_upstream_request(
+        model,
+        input_items,
+        instructions=_instructions_for_model(model),
+        reasoning_param=reasoning_param,
+        service_tier=service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/completions", parsed)
+            except Exception:
+                pass
+        return error_resp
+
+    record_rate_limits_from_response(upstream)
+
+    created = int(time.time())
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
+        if verbose:
+            _log_json("OUT POST /v1/completions", err)
+        return jsonify(err), upstream.status_code
+
+    if stream_req:
+        if verbose:
+            print("OUT POST /v1/completions (streaming response)")
+        stream_iter = sse_translate_text(
+            upstream,
+            requested_model or model,
+            created,
+            verbose=verbose_obfuscation,
+            vlog=(print if verbose_obfuscation else None),
+            include_usage=include_usage,
+        )
+        stream_iter = _wrap_stream_logging("STREAM OUT /v1/completions", stream_iter, verbose)
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    full_text = ""
+    response_id = "cmpl"
+    usage_obj: Dict[str, int] | None = None
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw_line in upstream.iter_lines(decode_unicode=False):
+            if not raw_line:
+                continue
+            line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data or data == "[DONE]":
+                if data == "[DONE]":
+                    break
+                continue
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            mu = _extract_usage(evt)
+            if mu:
+                usage_obj = mu
+            kind = evt.get("type")
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+
+    completion = {
+        "id": response_id or "cmpl",
+        "object": "text_completion",
+        "created": created,
+        "model": requested_model or model,
+        "choices": [
+            {"index": 0, "text": full_text, "finish_reason": "stop", "logprobs": None}
+        ],
+        **({"usage": usage_obj} if usage_obj else {}),
+    }
+    if verbose:
+        _log_json("OUT POST /v1/completions", completion)
+    resp = make_response(jsonify(completion), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+
+
+@openai_bp.route("/v1/responses", methods=["POST"])
+def responses_create() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/responses\n" + raw)
+        except Exception:
+            pass
+
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": {"message": "Invalid JSON body"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), 400
+
+    if not isinstance(payload, dict):
+        err = {"error": {"message": "Request body must be a JSON object"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), 400
+
+    try:
+        normalized = normalize_responses_payload(
+            payload,
+            config=current_app.config,
+            client_session_id=extract_client_session_id(request.headers),
+        )
+    except ResponsesRequestError as exc:
+        err: Dict[str, Any] = {"error": {"message": str(exc)}}
+        if exc.code:
+            err["error"]["code"] = exc.code
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), exc.status_code
+
+    if normalized.service_tier_resolution.warning_message and verbose:
+        print(f"[FastMode] {normalized.service_tier_resolution.warning_message}")
+
+    prepared = prepare_responses_request_for_session(
+        normalized.session_id,
+        normalized.payload,
+        allow_previous_response_id=False,
+    )
+    stream_req = bool(prepared.payload.get("stream", False))
+    upstream_payload = dict(prepared.payload)
+    upstream_payload["stream"] = True
+    upstream, error_resp = start_upstream_raw_request(
+        upstream_payload,
+        session_id=normalized.session_id,
+        stream=True,
+    )
+    if error_resp is not None:
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/responses", parsed)
+            except Exception:
+                pass
+        return error_resp
+
+    record_rate_limits_from_response(upstream)
+
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"error": {"message": upstream.text}}
+        except Exception:
+            err_body = {"error": {"message": upstream.text or "Upstream error"}}
+        finally:
+            upstream.close()
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            _log_json("OUT POST /v1/responses", err_body)
+        resp = make_response(jsonify(err_body), upstream.status_code)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    if stream_req:
+        if verbose:
+            print("OUT POST /v1/responses (streaming response)")
+        stream_iter = _wrap_stream_logging(
+            "STREAM OUT /v1/responses",
+            stream_upstream_bytes(
+                upstream,
+                on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+            ),
+            verbose,
+        )
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    content_type = upstream.headers.get("Content-Type", "")
+    if "application/json" in content_type.lower():
+        try:
+            body = upstream.json()
+        except Exception:
+            body = None
+        finally:
+            upstream.close()
+        if isinstance(body, dict):
+            note_responses_final_response(normalized.session_id, body)
+            if verbose:
+                _log_json("OUT POST /v1/responses", body)
+            resp = make_response(jsonify(body), upstream.status_code)
+            for k, v in build_cors_headers().items():
+                resp.headers.setdefault(k, v)
+            return resp
+
+    response_obj, error_obj = aggregate_response_from_sse(
+        upstream,
+        on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+    )
+    if error_obj is not None:
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            _log_json("OUT POST /v1/responses", error_obj)
+        resp = make_response(jsonify(error_obj), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    if response_obj is None:
+        clear_responses_reuse_state(normalized.session_id)
+        err = {"error": {"message": "Upstream response stream did not contain a completed response object"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        resp = make_response(jsonify(err), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+
+    if verbose:
+        _log_json("OUT POST /v1/responses", response_obj)
+    resp = make_response(jsonify(response_obj), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+
+
+@openai_bp.route("/v1/models", methods=["GET"])
+def list_models() -> Response:
+    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
+    model_ids = list_public_models(expose_reasoning_models=expose_variants)
+    data = [{"id": mid, "object": "model", "owned_by": "owner"} for mid in model_ids]
+    models = {"object": "list", "data": data}
+    resp = make_response(jsonify(models), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
diff --git a/chatmock/session.py b/chatmock/session.py
new file mode 100644
index 0000000000000000000000000000000000000000..705a50cdf94586cf0af4e8c98c418a1a07c3b31f
--- /dev/null
+++ b/chatmock/session.py
@@ -0,0 +1,312 @@
+from __future__ import annotations
+
+import copy
+import hashlib
+import json
+import threading
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
+
+
+_LOCK = threading.Lock()
+_FINGERPRINT_TO_UUID: Dict[str, str] = {}
+_ORDER: List[str] = []
+_MAX_ENTRIES = 10000
+_RESPONSES_SESSION_STATE: Dict[str, "_ResponsesSessionState"] = {}
+_RESPONSES_ORDER: List[str] = []
+
+
+@dataclass(frozen=True)
+class PreparedResponsesRequest:
+    payload: Dict[str, Any]
+    session_id: str
+
+
+@dataclass
+class _ResponsesSessionState:
+    last_request_payload: Dict[str, Any] | None = None
+    last_response_id: str | None = None
+    last_response_items: List[Dict[str, Any]] = field(default_factory=list)
+    inflight_request_payload: Dict[str, Any] | None = None
+    inflight_track_result: bool = False
+    inflight_response_id: str | None = None
+    inflight_response_items: List[Dict[str, Any]] = field(default_factory=list)
+
+
+def _canonicalize_first_user_message(input_items: List[Dict[str, Any]]) -> Dict[str, Any] | None:
+    """
+    Extract the first stable user message from Responses input items. Good use for a fingerprint for prompt caching.
+    """
+    for item in input_items:
+        if not isinstance(item, dict):
+            continue
+        if item.get("type") != "message":
+            continue
+        role = item.get("role")
+        if role != "user":
+            continue
+        content = item.get("content")
+        if not isinstance(content, list):
+            continue
+        norm_content = []
+        for part in content:
+            if not isinstance(part, dict):
+                continue
+            ptype = part.get("type")
+            if ptype == "input_text":
+                text = part.get("text") if isinstance(part.get("text"), str) else ""
+                if text:
+                    norm_content.append({"type": "input_text", "text": text})
+            elif ptype == "input_image":
+                url = part.get("image_url") if isinstance(part.get("image_url"), str) else None
+                if url:
+                    norm_content.append({"type": "input_image", "image_url": url})
+        if norm_content:
+            return {"type": "message", "role": "user", "content": norm_content}
+    return None
+
+
+def canonicalize_prefix(instructions: str | None, input_items: List[Dict[str, Any]]) -> str:
+    prefix: Dict[str, Any] = {}
+    if isinstance(instructions, str) and instructions.strip():
+        prefix["instructions"] = instructions.strip()
+    first_user = _canonicalize_first_user_message(input_items)
+    if first_user is not None:
+        prefix["first_user_message"] = first_user
+    return json.dumps(prefix, sort_keys=True, separators=(",", ":"))
+
+
+def _fingerprint(s: str) -> str:
+    return hashlib.sha256(s.encode("utf-8")).hexdigest()
+
+
+def _remember(fp: str, sid: str) -> None:
+    if fp in _FINGERPRINT_TO_UUID:
+        return
+    _FINGERPRINT_TO_UUID[fp] = sid
+    _ORDER.append(fp)
+    if len(_ORDER) > _MAX_ENTRIES:
+        oldest = _ORDER.pop(0)
+        _FINGERPRINT_TO_UUID.pop(oldest, None)
+
+
+def _remember_responses_session(session_id: str) -> _ResponsesSessionState:
+    state = _RESPONSES_SESSION_STATE.get(session_id)
+    if state is None:
+        state = _ResponsesSessionState()
+        _RESPONSES_SESSION_STATE[session_id] = state
+        _RESPONSES_ORDER.append(session_id)
+        if len(_RESPONSES_ORDER) > _MAX_ENTRIES:
+            oldest = _RESPONSES_ORDER.pop(0)
+            _RESPONSES_SESSION_STATE.pop(oldest, None)
+    return state
+
+
+def _request_without_input(payload: Dict[str, Any]) -> Dict[str, Any]:
+    clone = copy.deepcopy(payload)
+    clone["input"] = []
+    clone.pop("previous_response_id", None)
+    return clone
+
+
+def _input_list(payload: Dict[str, Any]) -> List[Dict[str, Any]] | None:
+    raw = payload.get("input")
+    if not isinstance(raw, list):
+        return None
+    return [item for item in copy.deepcopy(raw) if isinstance(item, dict)]
+
+
+def _conversation_output_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    reusable: List[Dict[str, Any]] = []
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        item_type = item.get("type")
+        if item_type == "reasoning":
+            continue
+        reusable.append(copy.deepcopy(item))
+    return reusable
+
+
+def _clear_reuse_state(state: _ResponsesSessionState) -> None:
+    state.last_request_payload = None
+    state.last_response_id = None
+    state.last_response_items = []
+    state.inflight_request_payload = None
+    state.inflight_track_result = False
+    state.inflight_response_id = None
+    state.inflight_response_items = []
+
+
+def _clear_inflight(state: _ResponsesSessionState) -> None:
+    state.inflight_request_payload = None
+    state.inflight_track_result = False
+    state.inflight_response_id = None
+    state.inflight_response_items = []
+
+
+def ensure_session_id(
+    instructions: str | None,
+    input_items: List[Dict[str, Any]],
+    client_supplied: str | None = None,
+) -> str:
+    if isinstance(client_supplied, str) and client_supplied.strip():
+        return client_supplied.strip()
+
+    canon = canonicalize_prefix(instructions, input_items)
+    fp = _fingerprint(canon)
+    with _LOCK:
+        if fp in _FINGERPRINT_TO_UUID:
+            return _FINGERPRINT_TO_UUID[fp]
+        sid = str(uuid.uuid4())
+        _remember(fp, sid)
+        return sid
+
+
+def prepare_responses_request_for_session(
+    session_id: str,
+    payload: Dict[str, Any],
+    *,
+    allow_previous_response_id: bool = True,
+) -> PreparedResponsesRequest:
+    full_payload = copy.deepcopy(payload)
+    outbound_payload = copy.deepcopy(payload)
+    explicit_previous_response_id = (
+        isinstance(full_payload.get("previous_response_id"), str)
+        and bool(full_payload.get("previous_response_id").strip())
+    )
+
+    with _LOCK:
+        state = _remember_responses_session(session_id)
+
+        if explicit_previous_response_id:
+            _clear_reuse_state(state)
+            return PreparedResponsesRequest(
+                payload=outbound_payload,
+                session_id=session_id,
+            )
+
+        request_input = _input_list(full_payload)
+        if (
+            allow_previous_response_id
+            and
+            state.last_request_payload is not None
+            and state.last_response_id
+            and request_input is not None
+            and _request_without_input(state.last_request_payload) == _request_without_input(full_payload)
+        ):
+            baseline: List[Dict[str, Any]] = []
+            previous_input = _input_list(state.last_request_payload)
+            if previous_input is not None:
+                baseline.extend(previous_input)
+            baseline.extend(copy.deepcopy(state.last_response_items))
+            baseline_len = len(baseline)
+            if request_input[:baseline_len] == baseline and baseline_len <= len(request_input):
+                outbound_payload["input"] = copy.deepcopy(request_input[baseline_len:])
+                outbound_payload["previous_response_id"] = state.last_response_id
+
+        state.inflight_request_payload = full_payload
+        state.inflight_track_result = True
+        state.inflight_response_id = None
+        state.inflight_response_items = []
+
+    return PreparedResponsesRequest(
+        payload=outbound_payload,
+        session_id=session_id,
+    )
+
+
+def note_responses_stream_event(session_id: str, event: Dict[str, Any]) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    if not isinstance(event, dict):
+        return
+
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+
+        kind = event.get("type")
+        if kind == "response.created":
+            response = event.get("response")
+            if isinstance(response, dict) and isinstance(response.get("id"), str):
+                state.inflight_response_id = response.get("id")
+            return
+
+        if kind == "response.output_item.done":
+            item = event.get("item")
+            if isinstance(item, dict):
+                state.inflight_response_items.append(copy.deepcopy(item))
+            return
+
+        if kind == "response.completed":
+            response = event.get("response")
+            response_id = None
+            response_items: List[Dict[str, Any]] = copy.deepcopy(state.inflight_response_items)
+            if isinstance(response, dict):
+                if isinstance(response.get("id"), str):
+                    response_id = response.get("id")
+                output = response.get("output")
+                if isinstance(output, list) and output:
+                    response_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)]
+            if not response_id:
+                response_id = state.inflight_response_id
+
+            if state.inflight_track_result and state.inflight_request_payload is not None and response_id:
+                state.last_request_payload = copy.deepcopy(state.inflight_request_payload)
+                state.last_response_id = response_id
+                state.last_response_items = _conversation_output_items(response_items)
+            else:
+                state.last_request_payload = None
+                state.last_response_id = None
+                state.last_response_items = []
+            _clear_inflight(state)
+            return
+
+        if kind in ("response.failed", "error"):
+            _clear_reuse_state(state)
+
+
+def note_responses_final_response(session_id: str, response_obj: Dict[str, Any]) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    if not isinstance(response_obj, dict):
+        return
+
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+
+        response_id = response_obj.get("id") if isinstance(response_obj.get("id"), str) else None
+        output = response_obj.get("output")
+        output_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)] if isinstance(output, list) else []
+        if state.inflight_track_result and state.inflight_request_payload is not None and response_id:
+            state.last_request_payload = copy.deepcopy(state.inflight_request_payload)
+            state.last_response_id = response_id
+            state.last_response_items = _conversation_output_items(output_items)
+        else:
+            state.last_request_payload = None
+            state.last_response_id = None
+            state.last_response_items = []
+        _clear_inflight(state)
+
+
+def clear_responses_reuse_state(session_id: str) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+        _clear_reuse_state(state)
+
+
+def reset_session_state() -> None:
+    with _LOCK:
+        _FINGERPRINT_TO_UUID.clear()
+        _ORDER.clear()
+        _RESPONSES_SESSION_STATE.clear()
+        _RESPONSES_ORDER.clear()
diff --git a/chatmock/transform.py b/chatmock/transform.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c611fb10adc27c04e630c2cd5f493c6a655bf89
--- /dev/null
+++ b/chatmock/transform.py
@@ -0,0 +1,149 @@
+from __future__ import annotations
+
+import json
+from typing import Any, Dict, List
+
+
+def to_data_url(image_str: str) -> str:
+    if not isinstance(image_str, str) or not image_str:
+        return image_str
+    s = image_str.strip()
+    if s.startswith("data:image/"):
+        return s
+    if s.startswith("http://") or s.startswith("https://"):
+        return s
+    b64 = s.replace("\n", "").replace("\r", "")
+    kind = "image/png"
+    if b64.startswith("/9j/"):
+        kind = "image/jpeg"
+    elif b64.startswith("iVBORw0KGgo"):
+        kind = "image/png"
+    elif b64.startswith("R0lGOD"):
+        kind = "image/gif"
+    return f"data:{kind};base64,{b64}"
+
+
+def convert_ollama_messages(
+    messages: List[Dict[str, Any]] | None, top_images: List[str] | None
+) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    msgs = messages if isinstance(messages, list) else []
+    pending_call_ids: List[str] = []
+    call_counter = 0
+    for m in msgs:
+        if not isinstance(m, dict):
+            continue
+        role = m.get("role") or "user"
+        nm: Dict[str, Any] = {"role": role}
+
+        content = m.get("content")
+        images = m.get("images") if isinstance(m.get("images"), list) else []
+        parts: List[Dict[str, Any]] = []
+        if isinstance(content, list):
+            for p in content:
+                if isinstance(p, dict) and p.get("type") == "text" and isinstance(p.get("text"), str):
+                    parts.append({"type": "text", "text": p.get("text")})
+        elif isinstance(content, str):
+            parts.append({"type": "text", "text": content})
+        for img in images:
+            url = to_data_url(img)
+            if isinstance(url, str) and url:
+                parts.append({"type": "image_url", "image_url": {"url": url}})
+        if parts:
+            nm["content"] = parts
+
+        if role == "assistant" and isinstance(m.get("tool_calls"), list):
+            tcs = []
+            for tc in m.get("tool_calls"):
+                if not isinstance(tc, dict):
+                    continue
+                fn = tc.get("function") if isinstance(tc.get("function"), dict) else {}
+                name = fn.get("name") if isinstance(fn.get("name"), str) else None
+                args = fn.get("arguments")
+                if name is None:
+                    continue
+                call_id = tc.get("id") or tc.get("call_id")
+                if not isinstance(call_id, str) or not call_id:
+                    call_counter += 1
+                    call_id = f"ollama_call_{call_counter}"
+                pending_call_ids.append(call_id)
+                tcs.append(
+                    {
+                        "id": call_id,
+                        "type": "function",
+                        "function": {
+                            "name": name,
+                            "arguments": args if isinstance(args, str) else (json.dumps(args) if isinstance(args, dict) else "{}"),
+                        },
+                    }
+                )
+            if tcs:
+                nm["tool_calls"] = tcs
+
+        if role == "tool":
+            tci = m.get("tool_call_id") or m.get("id")
+            if not isinstance(tci, str) or not tci:
+                if pending_call_ids:
+                    tci = pending_call_ids.pop(0)
+            if isinstance(tci, str) and tci:
+                nm["tool_call_id"] = tci
+
+            if not parts and isinstance(content, str):
+                nm["content"] = content
+
+        out.append(nm)
+
+    if isinstance(top_images, list) and top_images:
+        attach_to = None
+        for i in range(len(out) - 1, -1, -1):
+            if out[i].get("role") == "user":
+                attach_to = out[i]
+                break
+        if attach_to is None:
+            attach_to = {"role": "user", "content": []}
+            out.append(attach_to)
+        attach_to.setdefault("content", [])
+        for img in top_images:
+            url = to_data_url(img)
+            if isinstance(url, str) and url:
+                attach_to["content"].append({"type": "image_url", "image_url": {"url": url}})
+    return out
+
+
+def normalize_ollama_tools(tools: List[Dict[str, Any]] | None) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    if not isinstance(tools, list):
+        return out
+    for t in tools:
+        if not isinstance(t, dict):
+            continue
+        if isinstance(t.get("function"), dict):
+            fn = t.get("function")
+            name = fn.get("name") if isinstance(fn.get("name"), str) else None
+            if not name:
+                continue
+            out.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": name,
+                        "description": fn.get("description") or "",
+                        "parameters": fn.get("parameters") if isinstance(fn.get("parameters"), dict) else {"type": "object", "properties": {}},
+                    },
+                }
+            )
+            continue
+        name = t.get("name") if isinstance(t.get("name"), str) else None
+        if name:
+            out.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": name,
+                        "description": t.get("description") or "",
+                        "parameters": {"type": "object", "properties": {}},
+                    },
+                }
+            )
+    return out
+
diff --git a/chatmock/upstream.py b/chatmock/upstream.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba995cb25b08a74e8f542d5eb824693fb35196c4
--- /dev/null
+++ b/chatmock/upstream.py
@@ -0,0 +1,181 @@
+from __future__ import annotations
+
+import json
+import time
+from typing import Any, Dict, List, Tuple
+from urllib.parse import urlparse, urlunparse
+
+import requests
+from flask import Response, current_app, jsonify, make_response
+
+from .config import CHATGPT_RESPONSES_URL
+from .http import build_cors_headers
+from .model_registry import normalize_model_name
+from .session import ensure_session_id
+from flask import request as flask_request
+from .utils import get_effective_chatgpt_auth
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+
+def start_upstream_request(
+    model: str,
+    input_items: List[Dict[str, Any]],
+    *,
+    instructions: str | None = None,
+    tools: List[Dict[str, Any]] | None = None,
+    tool_choice: Any | None = None,
+    parallel_tool_calls: bool = False,
+    reasoning_param: Dict[str, Any] | None = None,
+    service_tier: str | None = None,
+):
+    access_token, account_id = get_effective_chatgpt_auth()
+    if not access_token or not account_id:
+        resp = make_response(
+            jsonify(
+                {
+                    "error": {
+                        "message": "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                    }
+                }
+            ),
+            401,
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+
+    include: List[str] = []
+    if isinstance(reasoning_param, dict):
+        include.append("reasoning.encrypted_content")
+
+    client_session_id = None
+    try:
+        client_session_id = (
+            flask_request.headers.get("X-Session-Id")
+            or flask_request.headers.get("session_id")
+            or None
+        )
+    except Exception:
+        client_session_id = None
+    session_id = ensure_session_id(instructions, input_items, client_session_id)
+
+    responses_payload = {
+        "model": model,
+        "instructions": instructions if isinstance(instructions, str) and instructions.strip() else instructions,
+        "input": input_items,
+        "tools": tools or [],
+        "tool_choice": tool_choice if tool_choice in ("auto", "none") or isinstance(tool_choice, dict) else "auto",
+        "parallel_tool_calls": bool(parallel_tool_calls),
+        "store": False,
+        "stream": True,
+        "prompt_cache_key": session_id,
+    }
+    if include:
+        responses_payload["include"] = include
+
+    if reasoning_param is not None:
+        responses_payload["reasoning"] = reasoning_param
+    if isinstance(service_tier, str) and service_tier.strip():
+        responses_payload["service_tier"] = service_tier.strip().lower()
+
+    return start_upstream_raw_request(
+        responses_payload,
+        session_id=session_id,
+        stream=True,
+    )
+
+
+def build_upstream_headers(
+    access_token: str,
+    account_id: str,
+    session_id: str,
+    *,
+    accept: str = "text/event-stream",
+) -> Dict[str, str]:
+    return {
+        "Authorization": f"Bearer {access_token}",
+        "Content-Type": "application/json",
+        "Accept": accept,
+        "chatgpt-account-id": account_id,
+        "OpenAI-Beta": "responses=experimental",
+        "session_id": session_id,
+    }
+
+
+def start_upstream_raw_request(
+    responses_payload: Dict[str, Any],
+    *,
+    session_id: str | None = None,
+    stream: bool = True,
+):
+    access_token, account_id = get_effective_chatgpt_auth()
+    if not access_token or not account_id:
+        resp = make_response(
+            jsonify(
+                {
+                    "error": {
+                        "message": "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                    }
+                }
+            ),
+            401,
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+
+    effective_session_id = session_id
+    if not isinstance(effective_session_id, str) or not effective_session_id.strip():
+        payload_prompt_cache_key = responses_payload.get("prompt_cache_key")
+        if isinstance(payload_prompt_cache_key, str) and payload_prompt_cache_key.strip():
+            effective_session_id = payload_prompt_cache_key.strip()
+    if not isinstance(effective_session_id, str) or not effective_session_id.strip():
+        effective_session_id = str(int(time.time() * 1000))
+
+    verbose = False
+    try:
+        verbose = bool(current_app.config.get("VERBOSE"))
+    except Exception:
+        verbose = False
+    if verbose:
+        _log_json("OUTBOUND >> ChatGPT Responses API payload", responses_payload)
+
+    headers = build_upstream_headers(
+        access_token,
+        account_id,
+        effective_session_id,
+        accept=("text/event-stream" if stream else "application/json"),
+    )
+
+    try:
+        upstream = requests.post(
+            CHATGPT_RESPONSES_URL,
+            headers=headers,
+            json=responses_payload,
+            stream=stream,
+            timeout=600,
+        )
+    except requests.RequestException as e:
+        resp = make_response(jsonify({"error": {"message": f"Upstream ChatGPT request failed: {e}"}}), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+    return upstream, None
+
+
+def build_upstream_websocket_url() -> str:
+    parsed = urlparse(CHATGPT_RESPONSES_URL)
+    scheme = parsed.scheme.lower()
+    if scheme == "https":
+        parsed = parsed._replace(scheme="wss")
+    elif scheme == "http":
+        parsed = parsed._replace(scheme="ws")
+    return urlunparse(parsed)
diff --git a/chatmock/utils.py b/chatmock/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..79703a5dc3c493e304169c6dcab720e75eb0ccdf
--- /dev/null
+++ b/chatmock/utils.py
@@ -0,0 +1,874 @@
+from __future__ import annotations
+
+import base64
+import datetime
+import hashlib
+import json
+import os
+import secrets
+import sys
+from typing import Any, Dict, List, Optional, Tuple
+
+import requests
+
+from .config import CLIENT_ID_DEFAULT, OAUTH_TOKEN_URL
+
+
+def eprint(*args, **kwargs) -> None:
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def get_home_dir() -> str:
+    home = os.getenv("CHATGPT_LOCAL_HOME") or os.getenv("CODEX_HOME")
+    if not home:
+        home = os.path.expanduser("~/.chatgpt-local")
+    return home
+
+
+def read_auth_file() -> Dict[str, Any] | None:
+    for base in [
+        os.getenv("CHATGPT_LOCAL_HOME"),
+        os.getenv("CODEX_HOME"),
+        os.path.expanduser("~/.chatgpt-local"),
+        os.path.expanduser("~/.codex"),
+    ]:
+        if not base:
+            continue
+        path = os.path.join(base, "auth.json")
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            continue
+        except Exception:
+            continue
+    return None
+
+
+def write_auth_file(auth: Dict[str, Any]) -> bool:
+    home = get_home_dir()
+    try:
+        os.makedirs(home, exist_ok=True)
+    except Exception as exc:
+        eprint(f"ERROR: unable to create auth home directory {home}: {exc}")
+        return False
+    path = os.path.join(home, "auth.json")
+    try:
+        with open(path, "w", encoding="utf-8") as fp:
+            if hasattr(os, "fchmod"):
+                os.fchmod(fp.fileno(), 0o600)
+            json.dump(auth, fp, indent=2)
+        return True
+    except Exception as exc:
+        eprint(f"ERROR: unable to write auth file: {exc}")
+        return False
+
+
+def parse_jwt_claims(token: str) -> Dict[str, Any] | None:
+    if not token or token.count(".") != 2:
+        return None
+    try:
+        _, payload, _ = token.split(".")
+        padded = payload + "=" * (-len(payload) % 4)
+        data = base64.urlsafe_b64decode(padded.encode())
+        return json.loads(data.decode())
+    except Exception:
+        return None
+
+
+def generate_pkce() -> "PkceCodes":
+    from .models import PkceCodes
+
+    code_verifier = secrets.token_hex(64)
+    digest = hashlib.sha256(code_verifier.encode()).digest()
+    code_challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode()
+    return PkceCodes(code_verifier=code_verifier, code_challenge=code_challenge)
+
+
+def convert_chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def _normalize_image_data_url(url: str) -> str:
+        try:
+            if not isinstance(url, str):
+                return url
+            if not url.startswith("data:image/"):
+                return url
+            if ";base64," not in url:
+                return url
+            header, data = url.split(",", 1)
+            try:
+                from urllib.parse import unquote
+
+                data = unquote(data)
+            except Exception:
+                pass
+            data = data.strip().replace("\n", "").replace("\r", "")
+            data = data.replace("-", "+").replace("_", "/")
+            pad = (-len(data)) % 4
+            if pad:
+                data = data + ("=" * pad)
+            try:
+                base64.b64decode(data, validate=True)
+            except Exception:
+                return url
+            return f"{header},{data}"
+        except Exception:
+            return url
+
+    input_items: List[Dict[str, Any]] = []
+    for message in messages:
+        role = message.get("role")
+        if role == "system":
+            continue
+
+        if role == "tool":
+            call_id = message.get("tool_call_id") or message.get("id")
+            if isinstance(call_id, str) and call_id:
+                content = message.get("content", "")
+                if isinstance(content, list):
+                    texts = []
+                    for part in content:
+                        if isinstance(part, dict):
+                            t = part.get("text") or part.get("content")
+                            if isinstance(t, str) and t:
+                                texts.append(t)
+                    content = "\n".join(texts)
+                if isinstance(content, str):
+                    input_items.append(
+                        {
+                            "type": "function_call_output",
+                            "call_id": call_id,
+                            "output": content,
+                        }
+                    )
+            continue
+        if role == "assistant" and isinstance(message.get("tool_calls"), list):
+            for tc in message.get("tool_calls") or []:
+                if not isinstance(tc, dict):
+                    continue
+                tc_type = tc.get("type", "function")
+                if tc_type != "function":
+                    continue
+                call_id = tc.get("id") or tc.get("call_id")
+                fn = tc.get("function") if isinstance(tc.get("function"), dict) else {}
+                name = fn.get("name") if isinstance(fn, dict) else None
+                args = fn.get("arguments") if isinstance(fn, dict) else None
+                if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                    input_items.append(
+                        {
+                            "type": "function_call",
+                            "name": name,
+                            "arguments": args,
+                            "call_id": call_id,
+                        }
+                    )
+
+        content = message.get("content", "")
+        content_items: List[Dict[str, Any]] = []
+        if isinstance(content, list):
+            for part in content:
+                if not isinstance(part, dict):
+                    continue
+                ptype = part.get("type")
+                if ptype == "text":
+                    text = part.get("text") or part.get("content") or ""
+                    if isinstance(text, str) and text:
+                        kind = "output_text" if role == "assistant" else "input_text"
+                        content_items.append({"type": kind, "text": text})
+                elif ptype == "image_url":
+                    image = part.get("image_url")
+                    url = image.get("url") if isinstance(image, dict) else image
+                    if isinstance(url, str) and url:
+                        content_items.append({"type": "input_image", "image_url": _normalize_image_data_url(url)})
+        elif isinstance(content, str) and content:
+            kind = "output_text" if role == "assistant" else "input_text"
+            content_items.append({"type": kind, "text": content})
+
+        if not content_items:
+            continue
+        role_out = "assistant" if role == "assistant" else "user"
+        input_items.append({"type": "message", "role": role_out, "content": content_items})
+    return input_items
+
+
+def convert_tools_chat_to_responses(tools: Any) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    if not isinstance(tools, list):
+        return out
+    for t in tools:
+        if not isinstance(t, dict):
+            continue
+        if t.get("type") != "function":
+            continue
+        fn = t.get("function") if isinstance(t.get("function"), dict) else {}
+        name = fn.get("name") if isinstance(fn, dict) else None
+        if not isinstance(name, str) or not name:
+            continue
+        desc = fn.get("description") if isinstance(fn, dict) else None
+        params = fn.get("parameters") if isinstance(fn, dict) else None
+        if not isinstance(params, dict):
+            params = {"type": "object", "properties": {}}
+        out.append(
+            {
+                "type": "function",
+                "name": name,
+                "description": desc or "",
+                "strict": False,
+                "parameters": params,
+            }
+        )
+    return out
+
+
+def load_chatgpt_tokens(ensure_fresh: bool = True) -> tuple[str | None, str | None, str | None]:
+    auth = read_auth_file()
+    if not isinstance(auth, dict):
+        return None, None, None
+
+    tokens = auth.get("tokens") if isinstance(auth.get("tokens"), dict) else {}
+    access_token: Optional[str] = tokens.get("access_token")
+    account_id: Optional[str] = tokens.get("account_id")
+    id_token: Optional[str] = tokens.get("id_token")
+    refresh_token: Optional[str] = tokens.get("refresh_token")
+    last_refresh = auth.get("last_refresh")
+
+    if ensure_fresh and isinstance(refresh_token, str) and refresh_token and CLIENT_ID_DEFAULT:
+        needs_refresh = _should_refresh_access_token(access_token, last_refresh)
+        if needs_refresh or not (isinstance(access_token, str) and access_token):
+            refreshed = _refresh_chatgpt_tokens(refresh_token, CLIENT_ID_DEFAULT)
+            if refreshed:
+                access_token = refreshed.get("access_token") or access_token
+                id_token = refreshed.get("id_token") or id_token
+                refresh_token = refreshed.get("refresh_token") or refresh_token
+                account_id = refreshed.get("account_id") or account_id
+
+                updated_tokens = dict(tokens)
+                if isinstance(access_token, str) and access_token:
+                    updated_tokens["access_token"] = access_token
+                if isinstance(id_token, str) and id_token:
+                    updated_tokens["id_token"] = id_token
+                if isinstance(refresh_token, str) and refresh_token:
+                    updated_tokens["refresh_token"] = refresh_token
+                if isinstance(account_id, str) and account_id:
+                    updated_tokens["account_id"] = account_id
+
+                persisted = _persist_refreshed_auth(auth, updated_tokens)
+                if persisted is not None:
+                    auth, tokens = persisted
+                else:
+                    tokens = updated_tokens
+
+    if not isinstance(account_id, str) or not account_id:
+        account_id = _derive_account_id(id_token)
+
+    access_token = access_token if isinstance(access_token, str) and access_token else None
+    id_token = id_token if isinstance(id_token, str) and id_token else None
+    account_id = account_id if isinstance(account_id, str) and account_id else None
+    return access_token, account_id, id_token
+
+
+def _should_refresh_access_token(access_token: Optional[str], last_refresh: Any) -> bool:
+    if not isinstance(access_token, str) or not access_token:
+        return True
+
+    claims = parse_jwt_claims(access_token) or {}
+    exp = claims.get("exp") if isinstance(claims, dict) else None
+    now = datetime.datetime.now(datetime.timezone.utc)
+    if isinstance(exp, (int, float)):
+        try:
+            expiry = datetime.datetime.fromtimestamp(float(exp), datetime.timezone.utc)
+        except (OverflowError, OSError, ValueError):
+            expiry = None
+        if expiry is not None:
+            return expiry <= now + datetime.timedelta(minutes=5)
+
+    if isinstance(last_refresh, str):
+        refreshed_at = _parse_iso8601(last_refresh)
+        if refreshed_at is not None:
+            return refreshed_at <= now - datetime.timedelta(minutes=55)
+    return False
+
+
+def _refresh_chatgpt_tokens(refresh_token: str, client_id: str) -> Optional[Dict[str, Optional[str]]]:
+    payload = {
+        "grant_type": "refresh_token",
+        "refresh_token": refresh_token,
+        "client_id": client_id,
+        "scope": "openid profile email offline_access",
+    }
+
+    try:
+        resp = requests.post(OAUTH_TOKEN_URL, json=payload, timeout=30)
+    except requests.RequestException as exc:
+        eprint(f"ERROR: failed to refresh ChatGPT token: {exc}")
+        return None
+
+    if resp.status_code >= 400:
+        eprint(f"ERROR: refresh token request returned status {resp.status_code}")
+        return None
+
+    try:
+        data = resp.json()
+    except ValueError as exc:
+        eprint(f"ERROR: unable to parse refresh token response: {exc}")
+        return None
+
+    id_token = data.get("id_token")
+    access_token = data.get("access_token")
+    new_refresh_token = data.get("refresh_token") or refresh_token
+    if not isinstance(id_token, str) or not isinstance(access_token, str):
+        eprint("ERROR: refresh token response missing expected tokens")
+        return None
+
+    account_id = _derive_account_id(id_token)
+    new_refresh_token = new_refresh_token if isinstance(new_refresh_token, str) and new_refresh_token else refresh_token
+    return {
+        "id_token": id_token,
+        "access_token": access_token,
+        "refresh_token": new_refresh_token,
+        "account_id": account_id,
+    }
+
+
+def _persist_refreshed_auth(auth: Dict[str, Any], updated_tokens: Dict[str, Any]) -> Optional[Tuple[Dict[str, Any], Dict[str, Any]]]:
+    updated_auth = dict(auth)
+    updated_auth["tokens"] = updated_tokens
+    updated_auth["last_refresh"] = _now_iso8601()
+    if write_auth_file(updated_auth):
+        return updated_auth, updated_tokens
+    eprint("ERROR: unable to persist refreshed auth tokens")
+    return None
+
+
+def _derive_account_id(id_token: Optional[str]) -> Optional[str]:
+    if not isinstance(id_token, str) or not id_token:
+        return None
+    claims = parse_jwt_claims(id_token) or {}
+    auth_claims = claims.get("https://api.openai.com/auth") if isinstance(claims, dict) else None
+    if isinstance(auth_claims, dict):
+        account_id = auth_claims.get("chatgpt_account_id")
+        if isinstance(account_id, str) and account_id:
+            return account_id
+    return None
+
+
+def _parse_iso8601(value: str) -> Optional[datetime.datetime]:
+    try:
+        if value.endswith("Z"):
+            value = value[:-1] + "+00:00"
+        dt = datetime.datetime.fromisoformat(value)
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=datetime.timezone.utc)
+        return dt.astimezone(datetime.timezone.utc)
+    except Exception:
+        return None
+
+
+def _now_iso8601() -> str:
+    return datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")
+
+
+def get_effective_chatgpt_auth() -> tuple[str | None, str | None]:
+    access_token, account_id, id_token = load_chatgpt_tokens()
+    if not account_id:
+        account_id = _derive_account_id(id_token)
+    return access_token, account_id
+
+
+def sse_translate_chat(
+    upstream,
+    model: str,
+    created: int,
+    verbose: bool = False,
+    vlog=None,
+    reasoning_compat: str = "think-tags",
+    *,
+    include_usage: bool = False,
+):
+    response_id = "chatcmpl-stream"
+    compat = (reasoning_compat or "think-tags").strip().lower()
+    think_open = False
+    think_closed = False
+    saw_output = False
+    sent_stop_chunk = False
+    saw_any_summary = False
+    pending_summary_paragraph = False
+    upstream_usage = None
+    ws_state: dict[str, Any] = {}
+    ws_index: dict[str, int] = {}
+    ws_next_index: int = 0
+    
+    def _serialize_tool_args(eff_args: Any) -> str:
+        """
+        Serialize tool call arguments with proper JSON handling.
+        
+        Args:
+            eff_args: Arguments to serialize (dict, list, str, or other)
+            
+        Returns:
+            JSON string representation of the arguments
+        """
+        if isinstance(eff_args, (dict, list)):
+            return json.dumps(eff_args)
+        elif isinstance(eff_args, str):
+            try:
+                parsed = json.loads(eff_args)
+                if isinstance(parsed, (dict, list)):
+                    return json.dumps(parsed) 
+                else:
+                    return json.dumps({"query": eff_args})  
+            except (json.JSONDecodeError, ValueError):
+                return json.dumps({"query": eff_args})
+        else:
+            return "{}"
+    
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        try:
+            line_iterator = upstream.iter_lines(decode_unicode=False)
+        except requests.exceptions.ChunkedEncodingError as e:
+            if verbose and vlog:
+                vlog(f"Failed to start stream: {e}")
+            yield b"data: [DONE]\n\n"
+            return
+
+        for raw in line_iterator:
+            try:
+                if not raw:
+                    continue
+                line = (
+                    raw.decode("utf-8", errors="ignore")
+                    if isinstance(raw, (bytes, bytearray))
+                    else raw
+                )
+                if verbose and vlog:
+                    vlog(line)
+                if not line.startswith("data: "):
+                    continue
+                data = line[len("data: ") :].strip()
+                if not data:
+                    continue
+                if data == "[DONE]":
+                    break
+                try:
+                    evt = json.loads(data)
+                except (json.JSONDecodeError, UnicodeDecodeError):
+                    continue
+            except (
+                requests.exceptions.ChunkedEncodingError,
+                ConnectionError,
+                BrokenPipeError,
+            ) as e:
+                # Connection interrupted mid-stream - end gracefully
+                if verbose and vlog:
+                    vlog(f"Stream interrupted: {e}")
+                yield b"data: [DONE]\n\n"
+                return
+            kind = evt.get("type")
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+
+            if isinstance(kind, str) and ("web_search_call" in kind):
+                try:
+                    call_id = evt.get("item_id") or "ws_call"
+                    if verbose and vlog:
+                        try:
+                            vlog(f"CM_TOOLS {kind} id={call_id} -> tool_calls(web_search)")
+                        except Exception:
+                            pass
+                    item = evt.get('item') if isinstance(evt.get('item'), dict) else {}
+                    params_dict = ws_state.setdefault(call_id, {}) if isinstance(ws_state.get(call_id), dict) else {}
+                    def _merge_from(src):
+                        if not isinstance(src, dict):
+                            return
+                        for whole in ('parameters','args','arguments','input'):
+                            if isinstance(src.get(whole), dict):
+                                params_dict.update(src.get(whole))
+                        if isinstance(src.get('query'), str): params_dict.setdefault('query', src.get('query'))
+                        if isinstance(src.get('q'), str): params_dict.setdefault('query', src.get('q'))
+                        for rk in ('recency','time_range','days'):
+                            if src.get(rk) is not None and rk not in params_dict: params_dict[rk] = src.get(rk)
+                        for dk in ('domains','include_domains','include'):
+                            if isinstance(src.get(dk), list) and 'domains' not in params_dict: params_dict['domains'] = src.get(dk)
+                        for mk in ('max_results','topn','limit'):
+                            if src.get(mk) is not None and 'max_results' not in params_dict: params_dict['max_results'] = src.get(mk)
+                    _merge_from(item)
+                    _merge_from(evt if isinstance(evt, dict) else None)
+                    params = params_dict if params_dict else None
+                    if isinstance(params, dict):
+                        try:
+                            ws_state.setdefault(call_id, {}).update(params)
+                        except Exception:
+                            pass
+                    eff_params = ws_state.get(call_id, params if isinstance(params, (dict, list, str)) else {})
+                    args_str = _serialize_tool_args(eff_params)
+                    if call_id not in ws_index:
+                        ws_index[call_id] = ws_next_index
+                        ws_next_index += 1
+                    _idx = ws_index.get(call_id, 0)
+                    delta_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {
+                                    "tool_calls": [
+                                        {
+                                            "index": _idx,
+                                            "id": call_id,
+                                            "type": "function",
+                                            "function": {"name": "web_search", "arguments": args_str},
+                                        }
+                                    ]
+                                },
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(delta_chunk)}\n\n".encode("utf-8")
+                    if kind.endswith(".completed") or kind.endswith(".done"):
+                        finish_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
+                            ],
+                        }
+                        yield f"data: {json.dumps(finish_chunk)}\n\n".encode("utf-8")
+                except Exception:
+                    pass
+
+            if kind == "response.output_text.delta":
+                delta = evt.get("delta") or ""
+                if compat == "think-tags" and think_open and not think_closed:
+                    close_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
+                    }
+                    yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
+                    think_open = False
+                    think_closed = True
+                saw_output = True
+                chunk = {
+                    "id": response_id,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and (item.get("type") == "function_call" or item.get("type") == "web_search_call"):
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ("web_search" if item.get("type") == "web_search_call" else "")
+                    raw_args = item.get("arguments") or item.get("parameters")
+                    if isinstance(raw_args, dict):
+                        try:
+                            ws_state.setdefault(call_id, {}).update(raw_args)
+                        except Exception:
+                            pass
+                    eff_args = ws_state.get(call_id, raw_args if isinstance(raw_args, (dict, list, str)) else {})
+                    try:
+                        args = _serialize_tool_args(eff_args)
+                    except Exception:
+                        args = "{}"
+                    if item.get("type") == "web_search_call" and verbose and vlog:
+                        try:
+                            vlog(f"CM_TOOLS response.output_item.done web_search_call id={call_id} has_args={bool(args)}")
+                        except Exception:
+                            pass
+                    if call_id not in ws_index:
+                        ws_index[call_id] = ws_next_index
+                        ws_next_index += 1
+                    _idx = ws_index.get(call_id, 0)
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        delta_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {
+                                        "tool_calls": [
+                                            {
+                                                "index": _idx,
+                                                "id": call_id,
+                                                "type": "function",
+                                                "function": {"name": name, "arguments": args},
+                                            }
+                                        ]
+                                    },
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(delta_chunk)}\n\n".encode("utf-8")
+
+                        finish_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {}, "finish_reason": "tool_calls"}],
+                        }
+                        yield f"data: {json.dumps(finish_chunk)}\n\n".encode("utf-8")
+            elif kind == "response.reasoning_summary_part.added":
+                if compat in ("think-tags", "o3"):
+                    if saw_any_summary:
+                        pending_summary_paragraph = True
+                    else:
+                        saw_any_summary = True
+            elif kind in ("response.reasoning_summary_text.delta", "response.reasoning_text.delta"):
+                delta_txt = evt.get("delta") or ""
+                if compat == "o3":
+                    if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                        nl_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"reasoning": {"content": [{"type": "text", "text": "\n"}]}},
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(nl_chunk)}\n\n".encode("utf-8")
+                        pending_summary_paragraph = False
+                    chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"reasoning": {"content": [{"type": "text", "text": delta_txt}]}},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                elif compat == "think-tags":
+                    if not think_open and not think_closed:
+                        open_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {"content": "<think>"}, "finish_reason": None}],
+                        }
+                        yield f"data: {json.dumps(open_chunk)}\n\n".encode("utf-8")
+                        think_open = True
+                    if think_open and not think_closed:
+                        if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                            nl_chunk = {
+                                "id": response_id,
+                                "object": "chat.completion.chunk",
+                                "created": created,
+                                "model": model,
+                                "choices": [{"index": 0, "delta": {"content": "\n"}, "finish_reason": None}],
+                            }
+                            yield f"data: {json.dumps(nl_chunk)}\n\n".encode("utf-8")
+                            pending_summary_paragraph = False
+                        content_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {"content": delta_txt}, "finish_reason": None}],
+                        }
+                        yield f"data: {json.dumps(content_chunk)}\n\n".encode("utf-8")
+                else:
+                    if kind == "response.reasoning_summary_text.delta":
+                        chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"reasoning_summary": delta_txt, "reasoning": delta_txt},
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                    else:
+                        chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {"index": 0, "delta": {"reasoning": delta_txt}, "finish_reason": None}
+                            ],
+                        }
+                        yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif isinstance(kind, str) and kind.endswith(".done"):
+                pass
+            elif kind == "response.output_text.done":
+                chunk = {
+                    "id": response_id,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                sent_stop_chunk = True
+            elif kind == "response.failed":
+                err = evt.get("response", {}).get("error", {}).get("message", "response.failed")
+                chunk = {"error": {"message": err}}
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.completed":
+                m = _extract_usage(evt)
+                if m:
+                    upstream_usage = m
+                if compat == "think-tags" and think_open and not think_closed:
+                    close_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
+                    }
+                    yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
+                    think_open = False
+                    think_closed = True
+                if not sent_stop_chunk:
+                    chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                    sent_stop_chunk = True
+
+                if include_usage and upstream_usage:
+                    try:
+                        usage_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {}, "finish_reason": None}],
+                            "usage": upstream_usage,
+                        }
+                        yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
+                    except Exception:
+                        pass
+                yield b"data: [DONE]\n\n"
+                break
+    finally:
+        upstream.close()
+
+
+def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None, *, include_usage: bool = False):
+    response_id = "cmpl-stream"
+    upstream_usage = None
+    
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw_line in upstream.iter_lines(decode_unicode=False):
+            if not raw_line:
+                continue
+            line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+            if verbose and vlog:
+                vlog(line)
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data or data == "[DONE]":
+                if data == "[DONE]":
+                    chunk = {
+                        "id": response_id,
+                        "object": "text_completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "text": "", "finish_reason": "stop"}],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                continue
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            if kind == "response.output_text.delta":
+                delta_text = evt.get("delta") or ""
+                chunk = {
+                    "id": response_id,
+                    "object": "text_completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "text": delta_text, "finish_reason": None}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.output_text.done":
+                chunk = {
+                    "id": response_id,
+                    "object": "text_completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "text": "", "finish_reason": "stop"}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.completed":
+                m = _extract_usage(evt)
+                if m:
+                    upstream_usage = m
+                if include_usage and upstream_usage:
+                    try:
+                        usage_chunk = {
+                            "id": response_id,
+                            "object": "text_completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "text": "", "finish_reason": None}],
+                            "usage": upstream_usage,
+                        }
+                        yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
+                    except Exception:
+                        pass
+                yield b"data: [DONE]\n\n"
+                break
+    finally:
+        upstream.close()
diff --git a/chatmock/version.py b/chatmock/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..17d6d1aaa10aae5181838a3cfa812d120d51b45e
--- /dev/null
+++ b/chatmock/version.py
@@ -0,0 +1,4 @@
+from __future__ import annotations
+
+
+__version__ = "1.37"
diff --git a/chatmock/websocket_routes.py b/chatmock/websocket_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..37fcfe034d79667c4e7114d7380914fadc0f740c
--- /dev/null
+++ b/chatmock/websocket_routes.py
@@ -0,0 +1,225 @@
+from __future__ import annotations
+
+import json
+import os
+import ssl
+from typing import Any, Dict
+
+import certifi
+from flask import current_app, request
+from flask_sock import Sock
+from websockets.sync.client import connect as websocket_connect
+from websockets.exceptions import ConnectionClosed
+
+from .responses_api import (
+    ResponsesRequestError,
+    extract_client_session_id,
+    normalize_responses_payload,
+)
+from .session import (
+    clear_responses_reuse_state,
+    note_responses_stream_event,
+    prepare_responses_request_for_session,
+)
+from .upstream import build_upstream_headers, build_upstream_websocket_url
+from .utils import get_effective_chatgpt_auth
+
+
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+
+
+def _error_event(message: str, *, status_code: int = 400, code: str | None = None) -> Dict[str, Any]:
+    error: Dict[str, Any] = {"message": message}
+    if code:
+        error["code"] = code
+    return {"type": "error", "status_code": status_code, "error": error}
+
+
+def _is_terminal_event(event: Any) -> bool:
+    if not isinstance(event, dict):
+        return False
+    kind = event.get("type")
+    return kind in ("response.completed", "response.failed", "error")
+
+
+def _build_websocket_ssl_context() -> ssl.SSLContext:
+    cafile = (
+        os.getenv("CODEX_CA_CERTIFICATE")
+        or os.getenv("SSL_CERT_FILE")
+        or certifi.where()
+    )
+    return ssl.create_default_context(cafile=cafile)
+
+
+def connect_upstream_websocket(url: str, headers: Dict[str, str]):
+    return websocket_connect(
+        url,
+        additional_headers=headers,
+        open_timeout=15,
+        ssl=_build_websocket_ssl_context(),
+    )
+
+
+def register_websocket_routes(sock: Sock) -> None:
+    @sock.route("/v1/responses")
+    def responses_websocket(ws) -> None:
+        verbose = bool(current_app.config.get("VERBOSE"))
+        upstream_ws = None
+        upstream_session_id: str | None = None
+        active_session_id: str | None = None
+
+        def _send_error(message: str, *, status_code: int = 400, code: str | None = None) -> None:
+            evt = _error_event(message, status_code=status_code, code=code)
+            if verbose:
+                _log_json("STREAM OUT WS /v1/responses (error)", evt)
+            try:
+                ws.send(json.dumps(evt))
+            except Exception:
+                pass
+
+        try:
+            while True:
+                incoming = ws.receive()
+                if incoming is None:
+                    break
+
+                if isinstance(incoming, bytes):
+                    incoming_text = incoming.decode("utf-8", errors="ignore")
+                else:
+                    incoming_text = str(incoming)
+                if verbose:
+                    print("IN WS /v1/responses\n" + incoming_text)
+
+                try:
+                    payload = json.loads(incoming_text)
+                except Exception:
+                    _send_error("Websocket frames must be valid JSON objects.", status_code=400)
+                    break
+
+                if not isinstance(payload, dict):
+                    _send_error("Websocket frames must be JSON objects.", status_code=400)
+                    break
+
+                client_session_id = extract_client_session_id(request.headers)
+                outbound_text = incoming_text
+                session_id = upstream_session_id
+
+                if payload.get("type") == "response.create":
+                    try:
+                        normalized = normalize_responses_payload(
+                            payload,
+                            config=current_app.config,
+                            client_session_id=client_session_id,
+                        )
+                    except ResponsesRequestError as exc:
+                        _send_error(str(exc), status_code=exc.status_code, code=exc.code)
+                        continue
+
+                    if normalized.service_tier_resolution.warning_message and verbose:
+                        print(f"[FastMode] {normalized.service_tier_resolution.warning_message}")
+                    prepared = prepare_responses_request_for_session(
+                        normalized.session_id,
+                        normalized.payload,
+                        allow_previous_response_id=True,
+                    )
+                    outbound_text = json.dumps(prepared.payload)
+                    session_id = normalized.session_id
+                    active_session_id = normalized.session_id
+                    if verbose:
+                        _log_json("OUTBOUND >> ChatGPT Responses WS payload", prepared.payload)
+                elif upstream_ws is None:
+                    _send_error(
+                        "The first websocket message must be a response.create request.",
+                        status_code=400,
+                    )
+                    break
+
+                if upstream_ws is None or (session_id and session_id != upstream_session_id):
+                    access_token, account_id = get_effective_chatgpt_auth()
+                    if not access_token or not account_id:
+                        if session_id:
+                            clear_responses_reuse_state(session_id)
+                        _send_error(
+                            "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                            status_code=401,
+                        )
+                        break
+
+                    if upstream_ws is not None:
+                        try:
+                            upstream_ws.close()
+                        except Exception:
+                            pass
+
+                    effective_session_id = session_id or client_session_id or ""
+                    try:
+                        upstream_ws = connect_upstream_websocket(
+                            build_upstream_websocket_url(),
+                            build_upstream_headers(
+                                access_token,
+                                account_id,
+                                effective_session_id,
+                                accept="application/json",
+                            ),
+                        )
+                    except Exception as exc:
+                        if session_id:
+                            clear_responses_reuse_state(session_id)
+                        _send_error(
+                            f"Upstream websocket connection failed: {exc}",
+                            status_code=502,
+                        )
+                        break
+                    upstream_session_id = effective_session_id
+
+                upstream_ws.send(outbound_text)
+
+                while True:
+                    try:
+                        upstream_message = upstream_ws.recv()
+                    except ConnectionClosed:
+                        if active_session_id:
+                            clear_responses_reuse_state(active_session_id)
+                        _send_error("Upstream websocket closed unexpectedly.", status_code=502)
+                        return
+                    if upstream_message is None:
+                        if active_session_id:
+                            clear_responses_reuse_state(active_session_id)
+                        _send_error("Upstream websocket closed unexpectedly.", status_code=502)
+                        return
+                    if verbose:
+                        try:
+                            print("STREAM OUT WS /v1/responses\n" + str(upstream_message))
+                        except Exception:
+                            pass
+                    ws.send(upstream_message)
+
+                    try:
+                        parsed = json.loads(upstream_message)
+                    except Exception:
+                        parsed = None
+                    if isinstance(parsed, dict) and active_session_id:
+                        note_responses_stream_event(active_session_id, parsed)
+                    if _is_terminal_event(parsed):
+                        if isinstance(parsed, dict) and parsed.get("type") in ("response.failed", "error"):
+                            if upstream_ws is not None:
+                                try:
+                                    upstream_ws.close()
+                                except Exception:
+                                    pass
+                            upstream_ws = None
+                            upstream_session_id = None
+                        break
+        finally:
+            if upstream_ws is not None:
+                try:
+                    upstream_ws.close()
+                except Exception:
+                    pass
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000000000000000000000000000000000000..0b155e8c4f499d019afe67d2081e087413d79eb3
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,34 @@
+services:
+  chatmock:
+    image: ${CHATMOCK_IMAGE:-storagetime/chatmock:latest}
+    container_name: chatmock
+    command: ["serve"]
+    env_file: .env
+    environment:
+      - CHATGPT_LOCAL_HOME=/data
+    ports:
+      - "8000:8000"
+    volumes:
+      - chatmock_data:/data
+      - ./prompt.md:/app/prompt.md:ro
+    healthcheck:
+      test: ["CMD-SHELL", "python -c \"import urllib.request,sys; sys.exit(0 if urllib.request.urlopen('http://127.0.0.1:8000/health').status==200 else 1)\" "]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 5s
+
+  chatmock-login:
+    image: ${CHATMOCK_IMAGE:-storagetime/chatmock:latest}
+    profiles: ["login"]
+    command: ["login"]
+    environment:
+      - CHATGPT_LOCAL_HOME=/data
+      - CHATGPT_LOCAL_LOGIN_BIND=0.0.0.0
+    volumes:
+      - chatmock_data:/data
+    ports:
+      - "1455:1455"
+
+volumes:
+  chatmock_data:
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
new file mode 100644
index 0000000000000000000000000000000000000000..48179dd8cecce73acf3ae9b468db7aae9d9ae97e
--- /dev/null
+++ b/docker/entrypoint.sh
@@ -0,0 +1,53 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+export CHATGPT_LOCAL_HOME="${CHATGPT_LOCAL_HOME:-/data}"
+
+# If AUTH_JSON is provided as an environment variable, write it to auth.json
+if [[ -n "${AUTH_JSON:-}" ]]; then
+  echo "Setting up auth from AUTH_JSON environment variable..."
+  mkdir -p "$CHATGPT_LOCAL_HOME"
+  echo "$AUTH_JSON" > "$CHATGPT_LOCAL_HOME/auth.json"
+  chmod 600 "$CHATGPT_LOCAL_HOME/auth.json"
+fi
+
+cmd="${1:-serve}"
+shift || true
+
+bool() {
+  case "${1:-}" in
+    1|true|TRUE|yes|YES|on|ON) return 0;;
+    *) return 1;;
+  esac
+}
+
+if [[ "$cmd" == "serve" ]]; then
+  # Hugging Face Spaces expects port 7860
+  PORT="${PORT:-7860}"
+  ARGS=(serve --host 0.0.0.0 --port "${PORT}")
+
+  if bool "${VERBOSE:-}" || bool "${CHATGPT_LOCAL_VERBOSE:-}"; then
+    ARGS+=(--verbose)
+  fi
+  if bool "${VERBOSE_OBFUSCATION:-}" || bool "${CHATGPT_LOCAL_VERBOSE_OBFUSCATION:-}"; then
+    ARGS+=(--verbose-obfuscation)
+  fi
+  if bool "${FAST_MODE:-}" || bool "${CHATGPT_LOCAL_FAST_MODE:-}"; then
+    ARGS+=(--fast-mode)
+  fi
+
+  if [[ "$#" -gt 0 ]]; then
+    ARGS+=("$@")
+  fi
+
+  exec chatmock "${ARGS[@]}"
+elif [[ "$cmd" == "login" ]]; then
+  ARGS=(login --no-browser)
+  if bool "${VERBOSE:-}" || bool "${CHATGPT_LOCAL_VERBOSE:-}"; then
+    ARGS+=(--verbose)
+  fi
+
+  exec chatmock "${ARGS[@]}"
+else
+  exec "$cmd" "$@"
+fi
diff --git a/gui.py b/gui.py
new file mode 100644
index 0000000000000000000000000000000000000000..5bdc18c78ccfb4a6e2b5c998fcc46e25010838a8
--- /dev/null
+++ b/gui.py
@@ -0,0 +1,579 @@
+from __future__ import annotations
+
+import sys
+import os
+import webbrowser
+import multiprocessing as mp
+
+from PySide6 import QtCore, QtGui, QtWidgets
+
+from chatmock.app import create_app
+from chatmock.cli import cmd_login
+from chatmock.utils import load_chatgpt_tokens, parse_jwt_claims
+
+
+def run_server(
+    host: str,
+    port: int,
+    reasoning_effort: str = "medium",
+    reasoning_summary: str = "auto",
+    reasoning_compat: str = "think-tags",
+    fast_mode: bool = False,
+    debug_model: str | None = None,
+    expose_reasoning_models: bool = False,
+    default_web_search: bool = False,
+) -> None:
+    app = create_app(
+        reasoning_effort=reasoning_effort,
+        reasoning_summary=reasoning_summary,
+        reasoning_compat=reasoning_compat,
+        fast_mode=fast_mode,
+        debug_model=debug_model,
+        expose_reasoning_models=expose_reasoning_models,
+        default_web_search=default_web_search,
+    )
+    app.run(host=host, port=port, use_reloader=False, threaded=True)
+
+
+class ServerProcess(QtCore.QObject):
+    state_changed = QtCore.Signal(bool)
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._proc: QtCore.QProcess | None = None
+        self._host = "127.0.0.1"
+        self._port = 8000
+        self._effort = "medium"
+        self._summary = "auto"
+        self._compat = "think-tags"
+        self._fast_mode = False
+        self._debug_model: str | None = None
+        self._expose_reasoning_models = False
+        self._default_web_search = False
+
+    def is_running(self) -> bool:
+        return self._proc is not None and self._proc.state() != QtCore.QProcess.NotRunning
+
+    def start(
+        self,
+        host: str,
+        port: int,
+        effort: str,
+        summary: str,
+        compat: str,
+        fast_mode: bool,
+        debug_model: str | None,
+        expose_reasoning_models: bool,
+        default_web_search: bool,
+    ) -> None:
+        if self.is_running():
+            return
+        self._host, self._port = host, port
+        self._effort, self._summary = effort, summary
+        self._compat = compat
+        self._fast_mode = fast_mode
+        self._debug_model = debug_model
+        self._expose_reasoning_models = expose_reasoning_models
+        self._default_web_search = default_web_search
+        self._proc = QtCore.QProcess()
+        self._proc.setProcessChannelMode(QtCore.QProcess.MergedChannels)
+        args = [
+            "--run-server",
+            "--host", host,
+            "--port", str(port),
+            "--effort", effort,
+            "--summary", summary,
+            "--compat", compat,
+        ]
+        if isinstance(debug_model, str) and debug_model.strip():
+            args.extend(["--debug-model", debug_model.strip()])
+        if fast_mode:
+            args.append("--fast-mode")
+        if expose_reasoning_models:
+            args.append("--expose-reasoning-models")
+        if default_web_search:
+            args.append("--enable-web-search")
+        self._proc.start(sys.executable, args)
+        self._proc.started.connect(lambda: self.state_changed.emit(True))
+
+        def _on_finished(code: int, status: QtCore.QProcess.ExitStatus) -> None:
+            self.state_changed.emit(False)
+            self._proc = None
+
+        self._proc.finished.connect(_on_finished)
+
+    def stop(self) -> None:
+        if not self.is_running():
+            return
+        try:
+            self._proc.kill()
+            self._proc.waitForFinished(3000)
+        except Exception:
+            pass
+        self._proc = None
+        self.state_changed.emit(False)
+
+    def base_url(self) -> str:
+        return f"http://{self._host}:{self._port}/v1"
+
+
+def resource_path(rel: str) -> str:
+    base = getattr(sys, "_MEIPASS", os.path.abspath(os.path.dirname(__file__)))
+    return os.path.join(base, rel)
+
+
+def find_app_icon() -> QtGui.QIcon:
+    candidates = [
+        "appicon.icns",
+        "appicon.ico",
+        "appicon.png",
+        "icon.icns",
+        "icon.ico",
+        "icon.png",
+        "ChatMock.icns",
+        "ChatMock.png",
+    ]
+    for name in candidates:
+        p = resource_path(name)
+        if os.path.exists(p):
+            icon = QtGui.QIcon(p)
+            if not icon.isNull():
+                return icon
+    return QtWidgets.QApplication.style().standardIcon(QtWidgets.QStyle.SP_DesktopIcon)
+
+
+def is_dark_mode() -> bool:
+    app = QtWidgets.QApplication.instance()
+    pal = app.palette() if app else QtGui.QPalette()
+    bg = pal.window().color()
+    return bg.lightness() < 128
+
+
+def apply_theme() -> None:
+    dark = is_dark_mode()
+    if dark:
+        bg = "#111827"  # slate-900
+        text = "#e5e7eb"  # gray-200
+        subtext = "#9ca3af"  # gray-400
+        border = "#374151"  # slate-700
+        primary = "#3b82f6"  # blue-500
+        primary_hover = "#2563eb"
+        danger = "#ef4444"  # red-500
+        field_bg = "#0f172a"  # slightly lighter (inputs)
+    else:
+        bg = "#ffffff"
+        text = "#0f172a"
+        subtext = "#64748b"
+        border = "#e5e7eb"
+        primary = "#2563eb"
+        primary_hover = "#1d4ed8"
+        danger = "#ef4444"
+        field_bg = "#ffffff"
+
+    css = f"""
+    QWidget {{ background: {bg}; color: {text}; }}
+    QGroupBox {{
+        background: {bg};
+        border: 1px solid {border};
+        border-radius: 10px;
+        padding: 12px;
+        margin-top: 8px;
+    }}
+    QGroupBox::title {{
+        subcontrol-origin: margin;
+        subcontrol-position: top left;
+        padding: 2px 6px;
+        color: {text};
+        font-weight: 600;
+        background: transparent;
+    }}
+    QLabel#subtitle {{ color: {subtext}; }}
+    QLabel {{ background: transparent; }}
+    QLineEdit, QComboBox {{
+        background: {field_bg};
+        border: 1px solid {border};
+        border-radius: 6px;
+        padding: 6px 8px;
+    }}
+    QPushButton {{
+        border: 1px solid {border};
+        border-radius: 6px;
+        padding: 6px 12px;
+        background: {bg};
+        color: {text};
+    }}
+    QPushButton:hover {{
+        border-color: {primary};
+    }}
+    QPushButton[muted="true"] {{
+        background: transparent;
+        color: {subtext};
+        border-color: {border};
+    }}
+    QPushButton[muted="true"]:hover {{
+        border-color: {primary};
+        color: {text};
+    }}
+    QPushButton[primary="true"] {{
+        background: {primary};
+        color: #ffffff;
+        border: 1px solid {primary};
+    }}
+    QPushButton[primary="true"]:hover {{
+        background: {primary_hover};
+        border-color: {primary_hover};
+    }}
+    QPushButton[danger="true"] {{
+        background: transparent;
+        color: {danger};
+        border: 1px solid {danger};
+    }}
+    QPushButton[danger="true"]:hover {{
+        background: {danger};
+        color: #ffffff;
+    }}
+    QMenu {{
+        background: {bg};
+        border: 1px solid {border};
+    }}
+    QMenu::item:selected {{ background: {primary}; color: #ffffff; }}
+    """
+
+    app = QtWidgets.QApplication.instance()
+    if app:
+        app.setStyleSheet(css)
+
+
+class LoginWorker(QtCore.QThread):
+    finished_with_code = QtCore.Signal(int)
+
+    def run(self) -> None:
+        try:
+            code = cmd_login(no_browser=False, verbose=False)
+        except Exception:
+            code = 1
+        self.finished_with_code.emit(code)
+
+
+class MainWindow(QtWidgets.QMainWindow):
+    def __init__(self) -> None:
+        super().__init__()
+        self.setWindowTitle("ChatMock")
+        self.setMinimumSize(620, 420)
+        self._logged_in = False
+        self._server = ServerProcess()
+        self._server.state_changed.connect(self._on_server_state_changed)
+
+        # Central widget
+        cw = QtWidgets.QWidget()
+        self.setCentralWidget(cw)
+        root = QtWidgets.QVBoxLayout(cw)
+        root.setContentsMargins(16, 16, 16, 12)
+        root.setSpacing(12)
+
+        # Header
+        header = QtWidgets.QVBoxLayout()
+        self.title = QtWidgets.QLabel("ChatMock")
+        font = self.title.font()
+        font.setPointSize(20)
+        font.setBold(True)
+        self.title.setFont(font)
+        self.status = QtWidgets.QLabel("Welcome to ChatMock")
+        self.status.setObjectName("subtitle")
+        header.addWidget(self.title)
+        header.addWidget(self.status)
+        root.addLayout(header)
+
+        # Account card
+        acc_box = QtWidgets.QGroupBox("Account")
+        acc_box.setStyleSheet("QGroupBox{font-weight:600;}")
+        acc_layout = QtWidgets.QFormLayout(acc_box)
+        acc_layout.setLabelAlignment(QtCore.Qt.AlignLeft)
+        acc_layout.setFormAlignment(QtCore.Qt.AlignLeft | QtCore.Qt.AlignTop)
+        acc_layout.setFieldGrowthPolicy(QtWidgets.QFormLayout.AllNonFixedFieldsGrow)
+        self.email_value = QtWidgets.QLabel("Not signed in")
+        self.email_value.setWordWrap(True)
+        self.plan_value = QtWidgets.QLabel("-")
+        self.accid_value = QtWidgets.QLabel("-")
+        self.accid_value.setWordWrap(True)
+        acc_layout.addRow("Email", self.email_value)
+        acc_layout.addRow("Plan", self.plan_value)
+        acc_layout.addRow("Account ID", self.accid_value)
+        acc_btns = QtWidgets.QHBoxLayout()
+        self.btn_login = QtWidgets.QPushButton("Log in")
+        self.btn_login.clicked.connect(self._on_login)
+        acc_btns.addWidget(self.btn_login)
+        acc_btns.addStretch(1)
+        acc_layout.addRow(acc_btns)
+        root.addWidget(acc_box)
+
+        # Server card
+        srv_box = QtWidgets.QGroupBox("Server")
+        srv_layout = QtWidgets.QVBoxLayout(srv_box)
+        form = QtWidgets.QGridLayout()
+        form.setHorizontalSpacing(12)
+        form.setVerticalSpacing(8)
+        form.addWidget(QtWidgets.QLabel("Host"), 0, 0)
+        self.host_edit = QtWidgets.QLineEdit("127.0.0.1")
+        self.host_edit.setClearButtonEnabled(True)
+        self.host_edit.setMinimumWidth(220)
+        self.host_edit.setSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed)
+        form.addWidget(self.host_edit, 0, 1)
+        form.addWidget(QtWidgets.QLabel("Port"), 0, 2)
+        self.port_edit = QtWidgets.QLineEdit("8000")
+        self.port_edit.setValidator(QtGui.QIntValidator(1, 65535, self))
+        self.port_edit.setMaximumWidth(100)
+        form.addWidget(self.port_edit, 0, 3)
+        form.addWidget(QtWidgets.QLabel("Debug Model"), 1, 0)
+        self.debug_model_edit = QtWidgets.QLineEdit("")
+        self.debug_model_edit.setClearButtonEnabled(True)
+        self.debug_model_edit.setPlaceholderText("Optional override, e.g. gpt-5.4")
+        self.debug_model_edit.setSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Fixed)
+        form.addWidget(self.debug_model_edit, 1, 1, 1, 3)
+        form.setColumnStretch(1, 1)
+        srv_layout.addLayout(form)
+
+        actions = QtWidgets.QHBoxLayout()
+        self.btn_start = QtWidgets.QPushButton("Start in Background")
+        self.btn_start.setDefault(True)
+        self.btn_start.setProperty("primary", True)
+        self.btn_stop = QtWidgets.QPushButton("Stop")
+        self.btn_stop.setProperty("danger", True)
+        self.btn_open = QtWidgets.QPushButton("Open Base URL")
+        actions.addWidget(self.btn_start)
+        actions.addWidget(self.btn_stop)
+        actions.addWidget(self.btn_open)
+        actions.addStretch(1)
+        srv_layout.addLayout(actions)
+
+        # Reasoning controls
+        opts = QtWidgets.QGridLayout()
+        opts.setHorizontalSpacing(12)
+        opts.setVerticalSpacing(8)
+        opts.addWidget(QtWidgets.QLabel("Reasoning Effort"), 0, 0)
+        self.effort = QtWidgets.QComboBox()
+        self.effort.addItems(["none", "minimal", "low", "medium", "high", "xhigh"])
+        self.effort.setCurrentText("medium")
+        self.effort.setSizeAdjustPolicy(QtWidgets.QComboBox.AdjustToContents)
+        self.effort.setMinimumContentsLength(7)
+        opts.addWidget(self.effort, 0, 1)
+        opts.addWidget(QtWidgets.QLabel("Reasoning Summary"), 0, 2)
+        self.summary = QtWidgets.QComboBox()
+        self.summary.addItems(["auto", "concise", "detailed", "none"])
+        self.summary.setCurrentText("auto")
+        self.summary.setSizeAdjustPolicy(QtWidgets.QComboBox.AdjustToContents)
+        self.summary.setMinimumContentsLength(8)
+        opts.addWidget(self.summary, 0, 3)
+        opts.addWidget(QtWidgets.QLabel("Reasoning Compat"), 1, 0)
+        self.compat = QtWidgets.QComboBox()
+        self.compat.addItems(["think-tags", "legacy", "o3", "current"])
+        self.compat.setCurrentText("think-tags")
+        self.compat.setSizeAdjustPolicy(QtWidgets.QComboBox.AdjustToContents)
+        opts.addWidget(self.compat, 1, 1)
+        self.expose_reasoning_models = QtWidgets.QCheckBox("Expose reasoning models")
+        opts.addWidget(self.expose_reasoning_models, 1, 2)
+        self.fast_mode = QtWidgets.QCheckBox("Enable fast mode")
+        opts.addWidget(self.fast_mode, 1, 3)
+        self.enable_web_search = QtWidgets.QCheckBox("Enable web search")
+        opts.addWidget(self.enable_web_search, 2, 0)
+        opts.setColumnStretch(1, 1)
+        opts.setColumnStretch(3, 1)
+        srv_layout.addLayout(opts)
+
+        url_row = QtWidgets.QHBoxLayout()
+        url_row.addWidget(QtWidgets.QLabel("Base URL:"))
+        self.baseurl = QtWidgets.QLabel("(server not running)")
+        self.baseurl.setTextInteractionFlags(
+            QtCore.Qt.TextSelectableByMouse | QtCore.Qt.TextSelectableByKeyboard
+        )
+        self.baseurl.setSizePolicy(QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Preferred)
+        url_row.addWidget(self.baseurl, 1)
+        self.btn_copy = QtWidgets.QPushButton("Copy")
+        url_row.addWidget(self.btn_copy)
+        srv_layout.addLayout(url_row)
+        root.addWidget(srv_box)
+
+        self.btn_start.clicked.connect(self._start_server)
+        self.btn_stop.clicked.connect(self._stop_server)
+        self.btn_copy.clicked.connect(self._copy_url)
+        self.btn_open.clicked.connect(self._open_base_url)
+
+        # Tray
+        self.tray = QtWidgets.QSystemTrayIcon(self)
+        icon = find_app_icon()
+        self.setWindowIcon(icon)
+        self.tray.setIcon(icon)
+        tray_menu = QtWidgets.QMenu()
+        act_show = tray_menu.addAction("Show Window")
+        tray_menu.addSeparator()
+        act_quit = tray_menu.addAction("Quit")
+        act_show.triggered.connect(self._show_window)
+        act_quit.triggered.connect(QtWidgets.QApplication.quit)
+        self.tray.setContextMenu(tray_menu)
+        self.tray.show()
+
+        self._refresh_login_state()
+        self._on_server_state_changed(False)
+
+        QtWidgets.QApplication.instance().aboutToQuit.connect(self._server.stop)
+
+    def _refresh_login_state(self) -> None:
+        access_token, account_id, id_token = load_chatgpt_tokens()
+        if access_token and id_token:
+            self.status.setText("Signed in • Ready to serve")
+            self._logged_in = True
+            self.btn_login.setEnabled(True)
+            self.btn_login.setProperty("muted", True)
+            try:
+                self.btn_login.style().unpolish(self.btn_login)
+                self.btn_login.style().polish(self.btn_login)
+            except Exception:
+                pass
+            self.btn_login.setToolTip("You are logged in. Click to re-authenticate.")
+            id_claims = parse_jwt_claims(id_token) or {}
+            access_claims = parse_jwt_claims(access_token) or {}
+            email = id_claims.get("email") or id_claims.get("preferred_username") or "<unknown>"
+            plan_raw = (access_claims.get("https://api.openai.com/auth") or {}).get("chatgpt_plan_type") or "unknown"
+            plan_map = {"plus": "Plus", "pro": "Pro", "free": "Free", "team": "Team", "enterprise": "Enterprise"}
+            plan = plan_map.get(
+                str(plan_raw).lower(), str(plan_raw).title() if isinstance(plan_raw, str) else "Unknown"
+            )
+            self.email_value.setText(email)
+            self.plan_value.setText(plan)
+            self.accid_value.setText(account_id or "-")
+        else:
+            self.status.setText("Not signed in • Click Log in")
+            self._logged_in = False
+            self.btn_login.setEnabled(True)
+            self.btn_login.setProperty("muted", False)
+            try:
+                self.btn_login.style().unpolish(self.btn_login)
+                self.btn_login.style().polish(self.btn_login)
+            except Exception:
+                pass
+            self.btn_login.setToolTip("Log in to ChatGPT")
+            self.email_value.setText("Not signed in")
+            self.plan_value.setText("-")
+            self.accid_value.setText("-")
+        self.btn_start.setEnabled(not self._server.is_running() and self._logged_in)
+
+    def _on_login(self) -> None:
+        self.status.setText("Launching login flow…")
+        self.btn_login.setEnabled(False)
+        self._login_worker = LoginWorker()
+        self._login_worker.finished_with_code.connect(self._after_login)
+        self._login_worker.start()
+
+    def _after_login(self, code: int) -> None:
+        if code == 0:
+            QtWidgets.QMessageBox.information(self, "Login", "Login successful. You can now start the server.")
+        elif code == 13:
+            QtWidgets.QMessageBox.warning(
+                self, "Login", "Login helper port is in use. Close other instances and try again."
+            )
+        else:
+            QtWidgets.QMessageBox.critical(self, "Login", "Login failed. Please try again.")
+        self._refresh_login_state()
+
+    def _start_server(self) -> None:
+        try:
+            host = self.host_edit.text().strip() or "127.0.0.1"
+            port = int(self.port_edit.text().strip() or "8000")
+        except ValueError:
+            QtWidgets.QMessageBox.critical(self, "Port", "Invalid port number.")
+            return
+        effort = self.effort.currentText().strip()
+        summary = self.summary.currentText().strip()
+        compat = self.compat.currentText().strip()
+        fast_mode = self.fast_mode.isChecked()
+        debug_model = self.debug_model_edit.text().strip() or None
+        expose_reasoning_models = self.expose_reasoning_models.isChecked()
+        default_web_search = self.enable_web_search.isChecked()
+        self.status.setText(f"Starting server at http://{host}:{port} …")
+        self.btn_start.setEnabled(False)
+        self._server.start(
+            host,
+            port,
+            effort,
+            summary,
+            compat,
+            fast_mode,
+            debug_model,
+            expose_reasoning_models,
+            default_web_search,
+        )
+
+    def _stop_server(self) -> None:
+        self._server.stop()
+
+    def _on_server_state_changed(self, running: bool) -> None:
+        self.btn_start.setEnabled((not running) and self._logged_in)
+        self.btn_stop.setEnabled(running)
+        self.btn_open.setEnabled(running)
+        self.btn_copy.setEnabled(running)
+        if running:
+            self.status.setText("Serving • Running in background")
+            self.baseurl.setText(self._server.base_url())
+            self.hide()
+            self.tray.showMessage(
+                "ChatMock", "Server is running in the background", QtWidgets.QSystemTrayIcon.Information, 1500
+            )
+        else:
+            self.status.setText("Server stopped")
+            self.baseurl.setText("(server not running)")
+
+    def _copy_url(self) -> None:
+        url = self.baseurl.text().strip()
+        if url and not url.startswith("("):
+            QtWidgets.QApplication.clipboard().setText(url)
+
+    def _open_base_url(self) -> None:
+        url = self.baseurl.text().strip()
+        if url and not url.startswith("("):
+            webbrowser.open(url)
+
+    def _show_window(self) -> None:
+        self.show()
+        self.raise_()
+        self.activateWindow()
+
+
+def main() -> None:
+    mp.freeze_support()
+    if "--run-server" in sys.argv:
+        import argparse
+
+        p = argparse.ArgumentParser(add_help=False)
+        p.add_argument("--run-server", action="store_true")
+        p.add_argument("--host", default="127.0.0.1")
+        p.add_argument("--port", type=int, default=8000)
+        p.add_argument("--effort", default="medium")
+        p.add_argument("--summary", default="auto")
+        p.add_argument("--compat", default="think-tags")
+        p.add_argument("--fast-mode", action="store_true")
+        p.add_argument("--debug-model")
+        p.add_argument("--expose-reasoning-models", action="store_true")
+        p.add_argument("--enable-web-search", action="store_true")
+        args, _ = p.parse_known_args()
+        run_server(
+            args.host,
+            args.port,
+            args.effort,
+            args.summary,
+            args.compat,
+            args.fast_mode,
+            args.debug_model,
+            args.expose_reasoning_models,
+            args.enable_web_search,
+        )
+        return
+
+    app = QtWidgets.QApplication(sys.argv)
+    apply_theme()
+    w = MainWindow()
+    w.show()
+    sys.exit(app.exec())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/icon.png b/icon.png
new file mode 100644
index 0000000000000000000000000000000000000000..ca5e086ca8a0d0669a6db7ac44e7547994c3f159
Binary files /dev/null and b/icon.png differ
diff --git a/prompt.md b/prompt.md
new file mode 100644
index 0000000000000000000000000000000000000000..4711dd749af12aaf87cc50abf4db11287cece8c7
--- /dev/null
+++ b/prompt.md
@@ -0,0 +1,326 @@
+You are a coding agent running in the Codex CLI, a terminal-based coding assistant. Codex CLI is an open source project led by OpenAI. You are expected to be precise, safe, and helpful.
+
+Your capabilities:
+- Receive user prompts and other context provided by the harness, such as files in the workspace.
+- Communicate with the user by streaming thinking & responses, and by making & updating plans.
+- Emit function calls to run terminal commands and apply patches. Depending on how this specific run is configured, you can request that these function calls be escalated to the user for approval before running. More on this in the "Sandbox and approvals" section.
+
+Within this context, Codex refers to the open-source agentic coding interface (not the old Codex language model built by OpenAI).
+
+# How you work
+
+## Personality
+
+Your default personality and tone is concise, direct, and friendly. You communicate efficiently, always keeping the user clearly informed about ongoing actions without unnecessary detail. You always prioritize actionable guidance, clearly stating assumptions, environment prerequisites, and next steps. Unless explicitly asked, you avoid excessively verbose explanations about your work.
+
+## Responsiveness
+
+### Preamble messages
+
+Before making tool calls, send a brief preamble to the user explaining what you’re about to do. When sending preamble messages, follow these principles and examples:
+
+- **Logically group related actions**: if you’re about to run several related commands, describe them together in one preamble rather than sending a separate note for each.
+- **Keep it concise**: be no more than 1-2 sentences (8–12 words for quick updates).
+- **Build on prior context**: if this is not your first tool call, use the preamble message to connect the dots with what’s been done so far and create a sense of momentum and clarity for the user to understand your next actions.
+- **Keep your tone light, friendly and curious**: add small touches of personality in preambles feel collaborative and engaging.
+
+**Examples:**
+- “I’ve explored the repo; now checking the API route definitions.”
+- “Next, I’ll patch the config and update the related tests.”
+- “I’m about to scaffold the CLI commands and helper functions.”
+- “Ok cool, so I’ve wrapped my head around the repo. Now digging into the API routes.”
+- “Config’s looking tidy. Next up is patching helpers to keep things in sync.”
+- “Finished poking at the DB gateway. I will now chase down error handling.”
+- “Alright, build pipeline order is interesting. Checking how it reports failures.”
+- “Spotted a clever caching util; now hunting where it gets used.”
+
+**Avoiding a preamble for every trivial read (e.g., `cat` a single file) unless it’s part of a larger grouped action.
+- Jumping straight into tool calls without explaining what’s about to happen.
+- Writing overly long or speculative preambles — focus on immediate, tangible next steps.
+
+## Planning
+
+You have access to an `update_plan` tool which tracks steps and progress and renders them to the user. Using the tool helps demonstrate that you've understood the task and convey how you're approaching it. Plans can help to make complex, ambiguous, or multi-phase work clearer and more collaborative for the user. A good plan should break the task into meaningful, logically ordered steps that are easy to verify as you go. Note that plans are not for padding out simple work with filler steps or stating the obvious. Do not repeat the full contents of the plan after an `update_plan` call — the harness already displays it. Instead, summarize the change made and highlight any important context or next step.
+
+Use a plan when:
+- The task is non-trivial and will require multiple actions over a long time horizon.
+- There are logical phases or dependencies where sequencing matters.
+- The work has ambiguity that benefits from outlining high-level goals.
+- You want intermediate checkpoints for feedback and validation.
+- When the user asked you to do more than one thing in a single prompt
+- The user has asked you to use the plan tool (aka "TODOs")
+- You generate additional steps while working, and plan to do them before yielding to the user
+
+Skip a plan when:
+- The task is simple and direct.
+- Breaking it down would only produce literal or trivial steps.
+
+Planning steps are called "steps" in the tool, but really they're more like tasks or TODOs. As such they should be very concise descriptions of non-obvious work that an engineer might do like "Write the API spec", then "Update the backend", then "Implement the frontend". On the other hand, it's obvious that you'll usually have to "Explore the codebase" or "Implement the changes", so those are not worth tracking in your plan.
+
+It may be the case that you complete all steps in your plan after a single pass of implementation. If this is the case, you can simply mark all the planned steps as completed. The content of your plan should not involve doing anything that you aren't capable of doing (i.e. don't try to test things that you can't test). Do not use plans for simple or single-step queries that you can just do or answer immediately.
+
+### Examples
+
+**High-quality plans**
+
+Example 1:
+
+1. Add CLI entry with file args
+2. Parse Markdown via CommonMark library
+3. Apply semantic HTML template
+4. Handle code blocks, images, links
+5. Add error handling for invalid files
+
+Example 2:
+
+1. Define CSS variables for colors
+2. Add toggle with localStorage state
+3. Refactor components to use variables
+4. Verify all views for readability
+5. Add smooth theme-change transition
+
+Example 3:
+
+1. Set up Node.js + WebSocket server
+2. Add join/leave broadcast events
+3. Implement messaging with timestamps
+4. Add usernames + mention highlighting
+5. Persist messages in lightweight DB
+6. Add typing indicators + unread count
+
+**Low-quality plans**
+
+Example 1:
+
+1. Create CLI tool
+2. Add Markdown parser
+3. Convert to HTML
+
+Example 2:
+
+1. Add dark mode toggle
+2. Save preference
+3. Make styles look good
+
+Example 3:
+
+1. Create single-file HTML game
+2. Run quick sanity check
+3. Summarize usage instructions
+
+If you need to write a plan, only write high quality plans, not low quality ones.
+
+## Task execution
+
+You are a coding agent. Please keep going until the query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved. Autonomously resolve the query to the best of your ability, using the tools available to you, before coming back to the user. Do NOT guess or make up an answer.
+
+You MUST adhere to the following criteria when solving queries:
+- Working on the repo(s) in the current environment is allowed, even if they are proprietary.
+- Analyzing code for vulnerabilities is allowed.
+- Showing user code and tool call details is allowed.
+- Use the `apply_patch` tool to edit files (NEVER try `applypatch` or `apply-patch`, only `apply_patch`): {"command":["apply_patch","*** Begin Patch\\n*** Update File: path/to/file.py\\n@@ def example():\\n-  pass\\n+  return 123\\n*** End Patch"]}
+
+If completing the user's task requires writing or modifying files, your code and final answer should follow these coding guidelines, though user instructions (i.e. AGENTS.md) may override these guidelines:
+
+- Fix the problem at the root cause rather than applying surface-level patches, when possible.
+- Avoid unneeded complexity in your solution.
+- Do not attempt to fix unrelated bugs or broken tests. It is not your responsibility to fix them. (You may mention them to the user in your final message though.)
+- Update documentation as necessary.
+- Keep changes consistent with the style of the existing codebase. Changes should be minimal and focused on the task.
+- Use `git log` and `git blame` to search the history of the codebase if additional context is required.
+- NEVER add copyright or license headers unless specifically requested.
+- Do not waste tokens by re-reading files after calling `apply_patch` on them. The tool call will fail if it didn't work. The same goes for making folders, deleting folders, etc.
+- Do not `git commit` your changes or create new git branches unless explicitly requested.
+- Do not add inline comments within code unless explicitly requested.
+- Do not use one-letter variable names unless explicitly requested.
+- NEVER output inline citations like "【F:README.md†L5-L14】" in your outputs. The CLI is not able to render these so they will just be broken in the UI. Instead, if you output valid filepaths, users will be able to click on them to open the files in their editor.
+
+## Testing your work
+
+If the codebase has tests or the ability to build or run, you should use them to verify that your work is complete. Generally, your testing philosophy should be to start as specific as possible to the code you changed so that you can catch issues efficiently, then make your way to broader tests as you build confidence. If there's no test for the code you changed, and if the adjacent patterns in the codebases show that there's a logical place for you to add a test, you may do so. However, do not add tests to codebases with no tests, or where the patterns don't indicate so.
+
+Once you're confident in correctness, use formatting commands to ensure that your code is well formatted. These commands can take time so you should run them on as precise a target as possible. If there are issues you can iterate up to 3 times to get formatting right, but if you still can't manage it's better to save the user time and present them a correct solution where you call out the formatting in your final message. If the codebase does not have a formatter configured, do not add one.
+
+For all of testing, running, building, and formatting, do not attempt to fix unrelated bugs. It is not your responsibility to fix them. (You may mention them to the user in your final message though.)
+
+## Sandbox and approvals
+
+The Codex CLI harness supports several different sandboxing, and approval configurations that the user can choose from.
+
+Filesystem sandboxing prevents you from editing files without user approval. The options are:
+- *read-only*: You can only read files.
+- *workspace-write*: You can read files. You can write to files in your workspace folder, but not outside it.
+- *danger-full-access*: No filesystem sandboxing.
+
+Network sandboxing prevents you from accessing network without approval. Options are
+- *ON*
+- *OFF*
+
+Approvals are your mechanism to get user consent to perform more privileged actions. Although they introduce friction to the user because your work is paused until the user responds, you should leverage them to accomplish your important work. Do not let these settings or the sandbox deter you from attempting to accomplish the user's task. Approval options are
+- *untrusted*: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
+- *on-failure*: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
+- *on-request*: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
+- *never*: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is pared with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.
+
+When you are running with approvals `on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
+- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /tmp)
+- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
+- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval.
+- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
+- (For all of these, you should weigh alternative paths that do not require approval.)
+
+Note that when sandboxing is set to read-only, you'll need to request approval for any command that isn't a read.
+
+You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing ON, and approval on-failure.
+
+## Ambition vs. precision
+
+For tasks that have no prior context (i.e. the user is starting something brand new), you should feel free to be ambitious and demonstrate creativity with your implementation.
+
+If you're operating in an existing codebase, you should make sure you do exactly what the user asks with surgical precision. Treat the surrounding codebase with respect, and don't overstep (i.e. changing filenames or variables unnecessarily). You should balance being sufficiently ambitious and proactive when completing tasks of this nature.
+
+You should use judicious initiative to decide on the right level of detail and complexity to deliver based on the user's needs. This means showing good judgment that you're capable of doing the right extras without gold-plating. This might be demonstrated by high-value, creative touches when scope of the task is vague; while being surgical and targeted when scope is tightly specified.
+
+## Sharing progress updates
+
+For especially longer tasks that you work on (i.e. requiring many tool calls, or a plan with multiple steps), you should provide progress updates back to the user at reasonable intervals. These updates should be structured as a concise sentence or two (no more than 8-10 words long) recapping progress so far in plain language: this update demonstrates your understanding of what needs to be done, progress so far (i.e. files explores, subtasks complete), and where you're going next.
+
+Before doing large chunks of work that may incur latency as experienced by the user (i.e. writing a new file), you should send a concise message to the user with an update indicating what you're about to do to ensure they know what you're spending time on. Don't start editing or writing large files before informing the user what you are doing and why.
+
+The messages you send before tool calls should describe what is immediately about to be done next in very concise language. If there was previous work done, this preamble message should also include a note about the work done so far to bring the user along.
+
+## Presenting your work and final message
+
+Your final message should read naturally, like an update from a concise teammate. For casual conversation, brainstorming tasks, or quick questions from the user, respond in a friendly, conversational tone. You should ask questions, suggest ideas, and adapt to the user’s style. If you've finished a large amount of work, when describing what you've done to the user, you should follow the final answer formatting guidelines to communicate substantive changes. You don't need to add structured formatting for one-word answers, greetings, or purely conversational exchanges.
+
+You can skip heavy formatting for single, simple actions or confirmations. In these cases, respond in plain sentences with any relevant next step or quick option. Reserve multi-section structured responses for results that need grouping or explanation.
+
+The user is working on the same computer as you, and has access to your work. As such there's no need to show the full contents of large files you have already written unless the user explicitly asks for them. Similarly, if you've created or modified files using `apply_patch`, there's no need to tell users to "save the file" or "copy the code into a file"—just reference the file path.
+
+If there's something that you think you could help with as a logical next step, concisely ask the user if they want you to do so. Good examples of this are running tests, committing changes, or building out the next logical component. If there’s something that you couldn't do (even with approval) but that the user might want to do (such as verifying changes by running the app), include those instructions succinctly.
+
+Brevity is very important as a default. You should be very concise (i.e. no more than 10 lines), but can relax this requirement for tasks where additional detail and comprehensiveness is important for the user's understanding.
+
+### Final answer structure and style guidelines
+
+You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
+
+**Section Headers**
+- Use only when they improve clarity — they are not mandatory for every answer.
+- Choose descriptive names that fit the content
+- Keep headers short (1–3 words) and in `**Title Case**`. Always start headers with `**` and end with `**`
+- Leave no blank line before the first bullet under a header.
+- Section headers should only be used where they genuinely improve scanability; avoid fragmenting the answer.
+
+**Bullets**
+- Use `-` followed by a space for every bullet.
+- Bold the keyword, then colon + concise description.
+- Merge related points when possible; avoid a bullet for every trivial detail.
+- Keep bullets to one line unless breaking for clarity is unavoidable.
+- Group into short lists (4–6 bullets) ordered by importance.
+- Use consistent keyword phrasing and formatting across sections.
+
+**Monospace**
+- Wrap all commands, file paths, env vars, and code identifiers in backticks (`` `...` ``).
+- Apply to inline examples and to bullet keywords if the keyword itself is a literal file/command.
+- Never mix monospace and bold markers; choose one based on whether it’s a keyword (`**`) or inline code/path (`` ` ``).
+
+**Structure**
+- Place related bullets together; don’t mix unrelated concepts in the same section.
+- Order sections from general → specific → supporting info.
+- For subsections (e.g., “Binaries” under “Rust Workspace”), introduce with a bolded keyword bullet, then list items under it.
+- Match structure to complexity:
+  - Multi-part or detailed results → use clear headers and grouped bullets.
+  - Simple results → minimal headers, possibly just a short list or paragraph.
+
+**Tone**
+- Keep the voice collaborative and natural, like a coding partner handing off work.
+- Be concise and factual — no filler or conversational commentary and avoid unnecessary repetition
+- Use present tense and active voice (e.g., “Runs tests” not “This will run tests”).
+- Keep descriptions self-contained; don’t refer to “above” or “below”.
+- Use parallel structure in lists for consistency.
+
+**Don’t**
+- Don’t use literal words “bold” or “monospace” in the content.
+- Don’t nest bullets or create deep hierarchies.
+- Don’t output ANSI escape codes directly — the CLI renderer applies them.
+- Don’t cram unrelated keywords into a single bullet; split for clarity.
+- Don’t let keyword lists run long — wrap or reformat for scanability.
+
+Generally, ensure your final answers adapt their shape and depth to the request. For example, answers to code explanations should have a precise, structured explanation with code references that answer the question directly. For tasks with a simple implementation, lead with the outcome and supplement only with what’s needed for clarity. Larger changes can be presented as a logical walkthrough of your approach, grouping related steps, explaining rationale where it adds value, and highlighting next actions to accelerate the user. Your answers should provide the right level of detail while being easily scannable.
+
+For casual greetings, acknowledgements, or other one-off conversational messages that are not delivering substantive information or structured results, respond naturally without section headers or bullet formatting.
+
+# Tools
+
+## `apply_patch`
+
+Your patch language is a stripped‑down, file‑oriented diff format designed to be easy to parse and safe to apply. You can think of it as a high‑level envelope:
+
+**_ Begin Patch
+[ one or more file sections ]
+_** End Patch
+
+Within that envelope, you get a sequence of file operations.
+You MUST include a header to specify the action you are taking.
+Each operation starts with one of three headers:
+
+**_ Add File: <path> - create a new file. Every following line is a + line (the initial contents).
+_** Delete File: <path> - remove an existing file. Nothing follows.
+\*\*\* Update File: <path> - patch an existing file in place (optionally with a rename).
+
+May be immediately followed by \*\*\* Move to: <new path> if you want to rename the file.
+Then one or more “hunks”, each introduced by @@ (optionally followed by a hunk header).
+Within a hunk each line starts with:
+
+- for inserted text,
+
+* for removed text, or
+  space ( ) for context.
+  At the end of a truncated hunk you can emit \*\*\* End of File.
+
+Patch := Begin { FileOp } End
+Begin := "**_ Begin Patch" NEWLINE
+End := "_** End Patch" NEWLINE
+FileOp := AddFile | DeleteFile | UpdateFile
+AddFile := "**_ Add File: " path NEWLINE { "+" line NEWLINE }
+DeleteFile := "_** Delete File: " path NEWLINE
+UpdateFile := "**_ Update File: " path NEWLINE [ MoveTo ] { Hunk }
+MoveTo := "_** Move to: " newPath NEWLINE
+Hunk := "@@" [ header ] NEWLINE { HunkLine } [ "*** End of File" NEWLINE ]
+HunkLine := (" " | "-" | "+") text NEWLINE
+
+A full patch can combine several operations:
+
+**_ Begin Patch
+_** Add File: hello.txt
++Hello world
+**_ Update File: src/app.py
+_** Move to: src/main.py
+@@ def greet():
+-print("Hi")
++print("Hello, world!")
+**_ Delete File: obsolete.txt
+_** End Patch
+
+It is important to remember:
+
+- You must include a header with your intended action (Add/Delete/Update)
+- You must prefix new lines with `+` even when creating a new file
+
+You can invoke apply_patch like:
+
+```
+shell {"command":["apply_patch","*** Begin Patch\n*** Add File: hello.txt\n+Hello, world!\n*** End Patch\n"]}
+```
+
+## `update_plan`
+
+A tool named `update_plan` is available to you. You can use it to keep an up‑to‑date, step‑by‑step plan for the task.
+
+To create a new plan, call `update_plan` with a short list of 1‑sentence steps (no more than 5-7 words each) with a `status` for each step (`pending`, `in_progress`, or `completed`).
+
+When steps have been completed, use `update_plan` to mark each finished step as `completed` and the next step you are working on as `in_progress`. There should always be exactly one `in_progress` step until everything is done. You can mark multiple items as complete in a single `update_plan` call.
+
+If all steps are complete, ensure you call `update_plan` to mark all steps as `completed`.
diff --git a/prompt_gpt5_codex.md b/prompt_gpt5_codex.md
new file mode 100644
index 0000000000000000000000000000000000000000..2c49fafec62ab29566fe38e5cd05fcf8aa0c9bce
--- /dev/null
+++ b/prompt_gpt5_codex.md
@@ -0,0 +1,100 @@
+You are Codex, based on GPT-5. You are running as a coding agent in the Codex CLI on a user's computer.
+
+## General
+
+- The arguments to `shell` will be passed to execvp(). Most terminal commands should be prefixed with ["bash", "-lc"].
+- Always set the `workdir` param when using the shell function. Do not use `cd` unless absolutely necessary.
+- When searching for text or files, prefer using `rg` or `rg --files` respectively because `rg` is much faster than alternatives like `grep`. (If the `rg` command is not found, then use alternatives.)
+
+## Editing constraints
+
+- Default to ASCII when editing or creating files. Only introduce non-ASCII or other Unicode characters when there is a clear justification and the file already uses them.
+- Add succinct code comments that explain what is going on if code is not self-explanatory. You should not add comments like "Assigns the value to the variable", but a brief comment might be useful ahead of a complex code block that the user would otherwise have to spend time parsing out. Usage of these comments should be rare.
+- You may be in a dirty git worktree.
+    * NEVER revert existing changes you did not make unless explicitly requested, since these changes were made by the user.
+    * If asked to make a commit or code edits and there are unrelated changes to your work or changes that you didn't make in those files, don't revert those changes.
+    * If the changes are in files you've touched recently, you should read carefully and understand how you can work with the changes rather than reverting them.
+    * If the changes are in unrelated files, just ignore them and don't revert them.
+- While you are working, you might notice unexpected changes that you didn't make. If this happens, STOP IMMEDIATELY and ask the user how they would like to proceed.
+
+## Plan tool
+
+When using the planning tool:
+- Skip using the planning tool for straightforward tasks (roughly the easiest 25%).
+- Do not make single-step plans.
+- When you made a plan, update it after having performed one of the sub-tasks that you shared on the plan.
+
+## Codex CLI harness, sandboxing, and approvals
+
+The Codex CLI harness supports several different sandboxing, and approval configurations that the user can choose from.
+
+Filesystem sandboxing defines which files can be read or written. The options are:
+- **read-only**: You can only read files.
+- **workspace-write**: You can read files. You can write to files in this folder, but not outside it.
+- **danger-full-access**: No filesystem sandboxing.
+
+Network sandboxing defines whether network can be accessed without approval. Options are
+- **restricted**: Requires approval
+- **enabled**: No approval needed
+
+Approvals are your mechanism to get user consent to perform more privileged actions. Although they introduce friction to the user because your work is paused until the user responds, you should leverage them to accomplish your important work. Do not let these settings or the sandbox deter you from attempting to accomplish the user's task unless it is set to "never", in which case never ask for approvals.
+
+Approval options are
+- **untrusted**: The harness will escalate most commands for user approval, apart from a limited allowlist of safe "read" commands.
+- **on-failure**: The harness will allow all commands to run in the sandbox (if enabled), and failures will be escalated to the user for approval to run again without the sandbox.
+- **on-request**: Commands will be run in the sandbox by default, and you can specify in your tool call if you want to escalate a command to run without sandboxing. (Note that this mode is not always available. If it is, you'll see parameters for it in the `shell` command description.)
+- **never**: This is a non-interactive mode where you may NEVER ask the user for approval to run commands. Instead, you must always persist and work around constraints to solve the task for the user. You MUST do your utmost best to finish the task and validate your work before yielding. If this mode is paired with `danger-full-access`, take advantage of it to deliver the best outcome for the user. Further, in this mode, your default testing philosophy is overridden: Even if you don't see local patterns for testing, you may add tests and scripts to validate your work. Just remove them before yielding.
+
+When you are running with approvals `on-request`, and sandboxing enabled, here are scenarios where you'll need to request approval:
+- You need to run a command that writes to a directory that requires it (e.g. running tests that write to /tmp)
+- You need to run a GUI app (e.g., open/xdg-open/osascript) to open browsers or files.
+- You are running sandboxed and need to run a command that requires network access (e.g. installing packages)
+- If you run a command that is important to solving the user's query, but it fails because of sandboxing, rerun the command with approval.
+- You are about to take a potentially destructive action such as an `rm` or `git reset` that the user did not explicitly ask for
+- (for all of these, you should weigh alternative paths that do not require approval)
+
+When sandboxing is set to read-only, you'll need to request approval for any command that isn't a read.
+
+You will be told what filesystem sandboxing, network sandboxing, and approval mode are active in a developer or user message. If you are not told about this, assume that you are running with workspace-write, network sandboxing enabled, and approval on-failure.
+
+## Special user requests
+
+- If the user makes a simple request (such as asking for the time) which you can fulfill by running a terminal command (such as `date`), you should do so.
+- If the user asks for a "review", default to a code review mindset: prioritise identifying bugs, risks, behavioural regressions, and missing tests. Findings must be the primary focus of the response - keep summaries or overviews brief and only after enumerating the issues. Present findings first (ordered by severity with file/line references), follow with open questions or assumptions, and offer a change-summary only as a secondary detail. If no findings are discovered, state that explicitly and mention any residual risks or testing gaps.
+
+## Presenting your work and final message
+
+You are producing plain text that will later be styled by the CLI. Follow these rules exactly. Formatting should make results easy to scan, but not feel mechanical. Use judgment to decide how much structure adds value.
+
+- Default: be very concise; friendly coding teammate tone.
+- Ask only when needed; suggest ideas; mirror the user's style.
+- For substantial work, summarize clearly; follow final‑answer formatting.
+- Skip heavy formatting for simple confirmations.
+- Don't dump large files you've written; reference paths only.
+- No "save/copy this file" - User is on the same machine.
+- Offer logical next steps (tests, commits, build) briefly; add verify steps if you couldn't do something.
+- For code changes:
+  * Lead with a quick explanation of the change, and then give more details on the context covering where and why a change was made. Do not start this explanation with "summary", just jump right in.
+  * If there are natural next steps the user may want to take, suggest them at the end of your response. Do not make suggestions if there are no natural next steps.
+  * When suggesting multiple options, use numeric lists for the suggestions so the user can quickly respond with a single number.
+- The user does not command execution outputs. When asked to show the output of a command (e.g. `git show`), relay the important details in your answer or summarize the key lines so the user understands the result.
+
+### Final answer structure and style guidelines
+
+- Plain text; CLI handles styling. Use structure only when it helps scanability.
+- Headers: optional; short Title Case (1-3 words) wrapped in **…**; no blank line before the first bullet; add only if they truly help.
+- Bullets: use - ; merge related points; keep to one line when possible; 4–6 per list ordered by importance; keep phrasing consistent.
+- Monospace: backticks for commands/paths/env vars/code ids and inline examples; use for literal keyword bullets; never combine with **.
+- Code samples or multi-line snippets should be wrapped in fenced code blocks; add a language hint whenever obvious.
+- Structure: group related bullets; order sections general → specific → supporting; for subsections, start with a bolded keyword bullet, then items; match complexity to the task.
+- Tone: collaborative, concise, factual; present tense, active voice; self‑contained; no "above/below"; parallel wording.
+- Don'ts: no nested bullets/hierarchies; no ANSI codes; don't cram unrelated keywords; keep keyword lists short—wrap/reformat if long; avoid naming formatting styles in answers.
+- Adaptation: code explanations → precise, structured with code refs; simple tasks → lead with outcome; big changes → logical walkthrough + rationale + next actions; casual one-offs → plain sentences, no headers/bullets.
+- File References: When referencing files in your response, make sure to include the relevant start line and always follow the below rules:
+  * Use inline code to make file paths clickable.
+  * Each reference should have a stand alone path. Even if it's the same file.
+  * Accepted: absolute, workspace‑relative, a/ or b/ diff prefixes, or bare filename/suffix.
+  * Line/column (1‑based, optional): :line[:column] or #Lline[Ccolumn] (column defaults to 1).
+  * Do not use URIs like file://, vscode://, or https://.
+  * Do not provide range of lines
+  * Examples: src/app.ts, src/app.ts:42, b/server/index.js#L10, C:\repo\project\main.rs:12:5
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..747e3d11a4ff219a6fad9a1e38a037a79a758692
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,42 @@
+[build-system]
+requires = ["setuptools>=61"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "chatmock"
+dynamic = ["version"]
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "blinker==1.9.0",
+    "certifi==2025.8.3",
+    "flask==3.1.1",
+    "flask-sock==0.7.0",
+    "idna==3.10",
+    "itsdangerous==2.2.0",
+    "jinja2==3.1.6",
+    "markupsafe==3.0.2",
+    "requests==2.32.5",
+    "urllib3==2.5.0",
+    "websockets==15.0.1",
+    "werkzeug==3.1.3",
+]
+
+[project.optional-dependencies]
+gui = [
+    "Pillow==11.3.0",
+    "PyInstaller==6.16.0",
+    "PySide6==6.9.2",
+]
+
+[project.scripts]
+chatmock = "chatmock.cli:main"
+
+[tool.setuptools.packages.find]
+include = ["chatmock*"]
+
+[tool.setuptools.package-data]
+chatmock = ["prompt.md", "prompt_gpt5_codex.md"]
+
+[tool.setuptools.dynamic]
+version = {attr = "chatmock.version.__version__"}
diff --git a/test_request.json b/test_request.json
new file mode 100644
index 0000000000000000000000000000000000000000..0ff8889472d1bab441179084e9bddaf32b9d28e0
--- /dev/null
+++ b/test_request.json
@@ -0,0 +1,4 @@
+{
+  "model": "gpt-5.4",
+  "messages": [{"role": "user", "content": "What is 2+2? Answer in one word."}]
+}
diff --git a/tests/__pycache__/test_models.cpython-314.pyc b/tests/__pycache__/test_models.cpython-314.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9f9bf6386a38d72ebd36093e3be0e598635fc02a
Binary files /dev/null and b/tests/__pycache__/test_models.cpython-314.pyc differ
diff --git a/tests/test_fast_mode.py b/tests/test_fast_mode.py
new file mode 100644
index 0000000000000000000000000000000000000000..6892ec52484cb6c2412e7dca007d367a2646bfba
--- /dev/null
+++ b/tests/test_fast_mode.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+import unittest
+
+from chatmock.fast_mode import parse_optional_bool, resolve_service_tier, supports_priority_service_tier
+
+
+class FastModeTests(unittest.TestCase):
+    def test_parse_optional_bool(self) -> None:
+        self.assertTrue(parse_optional_bool(True))
+        self.assertTrue(parse_optional_bool("true"))
+        self.assertFalse(parse_optional_bool(False))
+        self.assertFalse(parse_optional_bool("off"))
+        self.assertIsNone(parse_optional_bool("maybe"))
+
+    def test_priority_allowlist_uses_normalized_model_ids(self) -> None:
+        self.assertTrue(supports_priority_service_tier("gpt5.4"))
+        self.assertFalse(supports_priority_service_tier("gpt-5.3-codex"))
+
+    def test_explicit_fast_mode_true_errors_for_unsupported_model(self) -> None:
+        resolution = resolve_service_tier(
+            "gpt-5.3-codex",
+            request_fast_mode=True,
+            server_fast_mode=False,
+        )
+        self.assertIsNone(resolution.service_tier)
+        self.assertIsNotNone(resolution.error_message)
+
+    def test_server_default_fast_mode_falls_back_on_unsupported_model(self) -> None:
+        resolution = resolve_service_tier(
+            "gpt-5.3-codex",
+            server_fast_mode=True,
+        )
+        self.assertIsNone(resolution.service_tier)
+        self.assertIsNone(resolution.error_message)
+        self.assertIsNotNone(resolution.warning_message)
+
+    def test_request_fast_mode_false_overrides_server_default(self) -> None:
+        resolution = resolve_service_tier(
+            "gpt-5.4",
+            request_fast_mode=False,
+            server_fast_mode=True,
+        )
+        self.assertIsNone(resolution.service_tier)
+        self.assertIsNone(resolution.error_message)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000000000000000000000000000000000000..022cdd2ae8b54c27038a648ad8ca4ed2578c1c82
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,46 @@
+from __future__ import annotations
+
+import unittest
+
+from chatmock.model_registry import allowed_efforts_for_model, list_public_models, normalize_model_name
+
+
+class ModelRegistryTests(unittest.TestCase):
+    def test_normalizes_aliases(self) -> None:
+        self.assertEqual(normalize_model_name("gpt5"), "gpt-5")
+        self.assertEqual(normalize_model_name("gpt5.4"), "gpt-5.4")
+        self.assertEqual(normalize_model_name("gpt5.4-mini"), "gpt-5.4-mini")
+        self.assertEqual(normalize_model_name("gpt5.5"), "gpt-5.5")
+        self.assertEqual(normalize_model_name("gpt5.3-codex-spark"), "gpt-5.3-codex-spark")
+        self.assertEqual(normalize_model_name("codex"), "codex-mini-latest")
+
+    def test_strips_reasoning_suffixes(self) -> None:
+        self.assertEqual(normalize_model_name("gpt-5.5-high"), "gpt-5.5")
+        self.assertEqual(normalize_model_name("gpt-5.4-high"), "gpt-5.4")
+        self.assertEqual(normalize_model_name("gpt-5.4-mini-high"), "gpt-5.4-mini")
+        self.assertEqual(normalize_model_name("gpt-5.2_codemirror"), "gpt-5.2_codemirror")
+        self.assertEqual(normalize_model_name("gpt-5.1-codex:max"), "gpt-5.1-codex:max")
+        self.assertEqual(normalize_model_name("gpt-5.1-codex:high"), "gpt-5.1-codex")
+
+    def test_allowed_efforts_follow_registry(self) -> None:
+        self.assertEqual(allowed_efforts_for_model("gpt-5.5"), frozenset(("none", "low", "medium", "high", "xhigh")))
+        self.assertEqual(allowed_efforts_for_model("gpt-5.4"), frozenset(("none", "low", "medium", "high", "xhigh")))
+        self.assertEqual(allowed_efforts_for_model("gpt-5.4-mini"), frozenset(("low", "medium", "high", "xhigh")))
+        self.assertEqual(allowed_efforts_for_model("gpt-5.1-codex"), frozenset(("low", "medium", "high")))
+
+    def test_public_models_include_variants(self) -> None:
+        model_ids = list_public_models(expose_reasoning_models=True)
+        self.assertIn("gpt-5.5", model_ids)
+        self.assertIn("gpt-5.4", model_ids)
+        self.assertIn("gpt-5.4-mini", model_ids)
+        self.assertIn("gpt-5.3-codex-spark", model_ids)
+        self.assertIn("gpt-5.5-none", model_ids)
+        self.assertIn("gpt-5.4-none", model_ids)
+        self.assertIn("gpt-5.4-mini-xhigh", model_ids)
+        self.assertNotIn("gpt-5.4-mini-none", model_ids)
+        self.assertIn("gpt-5.1-codex-max-xhigh", model_ids)
+        self.assertNotIn("codex-mini-high", model_ids)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_routes.py b/tests/test_routes.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5d94bc77fe2ce56a1f5234c9c90b2650acf2bc3
--- /dev/null
+++ b/tests/test_routes.py
@@ -0,0 +1,658 @@
+from __future__ import annotations
+
+import json
+import socket
+import threading
+import time
+import unittest
+from unittest.mock import patch
+
+from chatmock.app import create_app
+from chatmock.session import reset_session_state
+from websockets.sync.client import connect as ws_connect
+
+
+class FakeUpstream:
+    def __init__(
+        self,
+        events: list[dict[str, object]] | None = None,
+        *,
+        status_code: int = 200,
+        headers: dict[str, str] | None = None,
+        content: bytes | None = None,
+        text: str = "",
+    ) -> None:
+        self._events = events
+        self.status_code = status_code
+        self.headers = headers or {}
+        self.content = content or b""
+        self.text = text
+
+    def iter_lines(self, decode_unicode: bool = False):
+        for event in self._events or []:
+            payload = f"data: {json.dumps(event)}"
+            yield payload if decode_unicode else payload.encode("utf-8")
+
+    def iter_content(self, chunk_size=None):
+        if self.content:
+            yield self.content
+            return
+        for event in self._events or []:
+            payload = f"data: {json.dumps(event)}\n\n".encode("utf-8")
+            yield payload
+
+    def json(self):
+        return json.loads(self.content.decode("utf-8"))
+
+    def close(self) -> None:
+        return None
+
+
+class RouteTests(unittest.TestCase):
+    def setUp(self) -> None:
+        reset_session_state()
+        self.app = create_app()
+        self.client = self.app.test_client()
+
+    def test_openai_models_list(self) -> None:
+        response = self.client.get("/v1/models")
+        body = response.get_json()
+        self.assertEqual(response.status_code, 200)
+        model_ids = [item["id"] for item in body["data"]]
+        self.assertIn("gpt-5.4", model_ids)
+        self.assertIn("gpt-5.4-mini", model_ids)
+        self.assertIn("gpt-5.3-codex-spark", model_ids)
+
+    def test_ollama_tags_list(self) -> None:
+        response = self.client.get("/api/tags")
+        body = response.get_json()
+        self.assertEqual(response.status_code, 200)
+        model_names = [item["name"] for item in body["models"]]
+        self.assertIn("gpt-5.4", model_names)
+        self.assertIn("gpt-5.4-mini", model_names)
+
+    @patch("chatmock.routes_openai.start_upstream_request")
+    def test_chat_completions(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {"type": "response.output_text.delta", "delta": "hello"},
+                    {"type": "response.completed", "response": {"id": "resp-openai"}},
+                ]
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/chat/completions",
+            json={"model": "gpt5.4-mini", "messages": [{"role": "user", "content": "hi"}]},
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(body["choices"][0]["message"]["content"], "hello")
+        self.assertEqual(body["model"], "gpt5.4-mini")
+
+    @patch("chatmock.routes_openai.start_upstream_request")
+    def test_chat_completions_honors_debug_model_override(self, mock_start) -> None:
+        app = create_app(debug_model="gpt-5.4")
+        client = app.test_client()
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {"type": "response.output_text.delta", "delta": "hello"},
+                    {"type": "response.completed", "response": {"id": "resp-openai"}},
+                ]
+            ),
+            None,
+        )
+        response = client.post(
+            "/v1/chat/completions",
+            json={"model": "gpt-5.3-codex", "messages": [{"role": "user", "content": "hi"}]},
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(mock_start.call_args.args[0], "gpt-5.4")
+
+    @patch("chatmock.routes_ollama.start_upstream_request")
+    def test_ollama_chat(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {"type": "response.output_text.delta", "delta": "hello"},
+                    {"type": "response.completed"},
+                ]
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/api/chat",
+            json={"model": "gpt-5.4", "messages": [{"role": "user", "content": "hi"}], "stream": False},
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(body["message"]["content"], "hello")
+        self.assertEqual(body["model"], "gpt-5.4")
+
+    @patch("chatmock.routes_ollama.start_upstream_request")
+    def test_ollama_chat_honors_debug_model_override(self, mock_start) -> None:
+        app = create_app(debug_model="gpt-5.4")
+        client = app.test_client()
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {"type": "response.output_text.delta", "delta": "hello"},
+                    {"type": "response.completed"},
+                ]
+            ),
+            None,
+        )
+        response = client.post(
+            "/api/chat",
+            json={"model": "gpt-5.3-codex", "messages": [{"role": "user", "content": "hi"}], "stream": False},
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(mock_start.call_args.args[0], "gpt-5.4")
+        self.assertEqual(body["model"], "gpt-5.4")
+
+    @patch("chatmock.routes_openai.start_upstream_request")
+    def test_chat_completions_fast_mode_sets_priority_service_tier(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {"type": "response.output_text.delta", "delta": "hello"},
+                    {"type": "response.completed", "response": {"id": "resp-openai"}},
+                ]
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/chat/completions",
+            json={
+                "model": "gpt-5.4",
+                "fast_mode": True,
+                "messages": [{"role": "user", "content": "hi"}],
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(mock_start.call_args.kwargs["service_tier"], "priority")
+
+    @patch("chatmock.routes_openai.start_upstream_request")
+    def test_chat_completions_fast_mode_false_overrides_server_default(self, mock_start) -> None:
+        app = create_app(fast_mode=True)
+        client = app.test_client()
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {"type": "response.output_text.delta", "delta": "hello"},
+                    {"type": "response.completed", "response": {"id": "resp-openai"}},
+                ]
+            ),
+            None,
+        )
+        response = client.post(
+            "/v1/chat/completions",
+            json={
+                "model": "gpt-5.4",
+                "fast_mode": False,
+                "messages": [{"role": "user", "content": "hi"}],
+            },
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertIsNone(mock_start.call_args.kwargs["service_tier"])
+
+    @patch("chatmock.routes_openai.start_upstream_request")
+    def test_chat_completions_rejects_unsupported_explicit_fast_mode(self, mock_start) -> None:
+        response = self.client.post(
+            "/v1/chat/completions",
+            json={
+                "model": "gpt-5.3-codex",
+                "fast_mode": True,
+                "messages": [{"role": "user", "content": "hi"}],
+            },
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 400)
+        self.assertIn("Fast mode is not supported", body["error"]["message"])
+        mock_start.assert_not_called()
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_returns_completed_response_object(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {
+                        "type": "response.created",
+                        "response": {"id": "resp_123", "object": "response", "status": "in_progress"},
+                    },
+                    {
+                        "type": "response.completed",
+                        "response": {
+                            "id": "resp_123",
+                            "object": "response",
+                            "status": "completed",
+                            "output": [],
+                        },
+                    },
+                ],
+                headers={"Content-Type": "text/event-stream"},
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/responses",
+            json={"model": "gpt5.4-mini", "input": "hello"},
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(body["id"], "resp_123")
+        outbound_payload = mock_start.call_args.args[0]
+        self.assertEqual(outbound_payload["model"], "gpt-5.4-mini")
+        self.assertEqual(outbound_payload["store"], False)
+        self.assertEqual(
+            outbound_payload["input"],
+            [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}],
+        )
+        self.assertEqual(outbound_payload["reasoning"]["effort"], "medium")
+        self.assertIsInstance(outbound_payload["prompt_cache_key"], str)
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_honors_debug_model_override(self, mock_start) -> None:
+        app = create_app(debug_model="gpt-5.4")
+        client = app.test_client()
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {
+                        "type": "response.created",
+                        "response": {"id": "resp_debug", "object": "response", "status": "in_progress"},
+                    },
+                    {
+                        "type": "response.completed",
+                        "response": {
+                            "id": "resp_debug",
+                            "object": "response",
+                            "status": "completed",
+                            "output": [],
+                        },
+                    },
+                ],
+                headers={"Content-Type": "text/event-stream"},
+            ),
+            None,
+        )
+        response = client.post(
+            "/v1/responses",
+            json={"model": "gpt-5.3-codex", "input": "hello"},
+        )
+        self.assertEqual(response.status_code, 200)
+        outbound_payload = mock_start.call_args.args[0]
+        self.assertEqual(outbound_payload["model"], "gpt-5.4")
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_strips_unsupported_max_output_tokens(self, mock_start) -> None:
+        mock_start.return_value = (
+            FakeUpstream(
+                [
+                    {
+                        "type": "response.created",
+                        "response": {"id": "resp_limit", "object": "response", "status": "in_progress"},
+                    },
+                    {
+                        "type": "response.completed",
+                        "response": {
+                            "id": "resp_limit",
+                            "object": "response",
+                            "status": "completed",
+                            "output": [],
+                        },
+                    },
+                ],
+                headers={"Content-Type": "text/event-stream"},
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/responses",
+            json={"model": "gpt-5.4", "input": "hello", "max_output_tokens": 20},
+        )
+        self.assertEqual(response.status_code, 200)
+        outbound_payload = mock_start.call_args.args[0]
+        self.assertNotIn("max_output_tokens", outbound_payload)
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_does_not_use_previous_response_id_for_http_follow_up(self, mock_start) -> None:
+        mock_start.side_effect = [
+            (
+                FakeUpstream(
+                    [
+                        {
+                            "type": "response.created",
+                            "response": {"id": "resp_1", "object": "response", "status": "in_progress"},
+                        },
+                        {
+                            "type": "response.output_item.done",
+                            "item": {
+                                "type": "message",
+                                "role": "assistant",
+                                "id": "msg_1",
+                                "content": [{"type": "output_text", "text": "assistant output"}],
+                            },
+                        },
+                        {
+                            "type": "response.completed",
+                            "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []},
+                        },
+                    ],
+                    headers={"Content-Type": "text/event-stream"},
+                ),
+                None,
+            ),
+            (
+                FakeUpstream(
+                    [
+                        {
+                            "type": "response.created",
+                            "response": {"id": "resp_2", "object": "response", "status": "in_progress"},
+                        },
+                        {
+                            "type": "response.completed",
+                            "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []},
+                        },
+                    ],
+                    headers={"Content-Type": "text/event-stream"},
+                ),
+                None,
+            ),
+        ]
+
+        first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"})
+        second = self.client.post(
+            "/v1/responses",
+            json={
+                "model": "gpt-5.4",
+                "input": [
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+                    {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+                ],
+            },
+        )
+
+        self.assertEqual(first.status_code, 200)
+        self.assertEqual(second.status_code, 200)
+        outbound_payload = mock_start.call_args_list[1].args[0]
+        self.assertNotIn("previous_response_id", outbound_payload)
+        self.assertEqual(
+            outbound_payload["input"],
+            [
+                {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+                {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
+                {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+            ],
+        )
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_falls_back_to_full_create_when_non_input_fields_change(self, mock_start) -> None:
+        mock_start.side_effect = [
+            (
+                FakeUpstream(
+                    [
+                        {
+                            "type": "response.created",
+                            "response": {"id": "resp_1", "object": "response", "status": "in_progress"},
+                        },
+                        {
+                            "type": "response.completed",
+                            "response": {"id": "resp_1", "object": "response", "status": "completed", "output": []},
+                        },
+                    ],
+                    headers={"Content-Type": "text/event-stream"},
+                ),
+                None,
+            ),
+            (
+                FakeUpstream(
+                    [
+                        {
+                            "type": "response.created",
+                            "response": {"id": "resp_2", "object": "response", "status": "in_progress"},
+                        },
+                        {
+                            "type": "response.completed",
+                            "response": {"id": "resp_2", "object": "response", "status": "completed", "output": []},
+                        },
+                    ],
+                    headers={"Content-Type": "text/event-stream"},
+                ),
+                None,
+            ),
+        ]
+
+        headers = {"X-Session-Id": "session-fixed"}
+        first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers)
+        second = self.client.post(
+            "/v1/responses",
+            json={
+                "model": "gpt-5.4",
+                "instructions": "changed",
+                "input": [
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+                ],
+            },
+            headers=headers,
+        )
+
+        self.assertEqual(first.status_code, 200)
+        self.assertEqual(second.status_code, 200)
+        outbound_payload = mock_start.call_args_list[1].args[0]
+        self.assertNotIn("previous_response_id", outbound_payload)
+        self.assertEqual(
+            outbound_payload["input"],
+            [
+                {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+                {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+            ],
+        )
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_clears_reuse_state_after_error(self, mock_start) -> None:
+        mock_start.side_effect = [
+            (
+                FakeUpstream(
+                    [
+                        {"type": "response.created", "response": {"id": "resp_1"}},
+                        {"type": "response.completed", "response": {"id": "resp_1", "output": []}},
+                    ],
+                    headers={"Content-Type": "text/event-stream"},
+                ),
+                None,
+            ),
+            (
+                FakeUpstream(
+                    [
+                        {"type": "response.failed", "response": {"error": {"message": "boom"}}},
+                    ],
+                    headers={"Content-Type": "text/event-stream"},
+                ),
+                None,
+            ),
+            (
+                FakeUpstream(
+                    [
+                        {"type": "response.created", "response": {"id": "resp_3"}},
+                        {"type": "response.completed", "response": {"id": "resp_3", "output": []}},
+                    ],
+                    headers={"Content-Type": "text/event-stream"},
+                ),
+                None,
+            ),
+        ]
+
+        headers = {"X-Session-Id": "session-fixed"}
+        first = self.client.post("/v1/responses", json={"model": "gpt-5.4", "input": "hello"}, headers=headers)
+        second = self.client.post(
+            "/v1/responses",
+            json={
+                "model": "gpt-5.4",
+                "input": [
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+                ],
+            },
+            headers=headers,
+        )
+        third = self.client.post(
+            "/v1/responses",
+            json={
+                "model": "gpt-5.4",
+                "input": [
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+                    {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]},
+                ],
+            },
+            headers=headers,
+        )
+
+        self.assertEqual(first.status_code, 200)
+        self.assertEqual(second.status_code, 502)
+        self.assertEqual(third.status_code, 200)
+        outbound_payload = mock_start.call_args_list[2].args[0]
+        self.assertNotIn("previous_response_id", outbound_payload)
+        self.assertEqual(
+            outbound_payload["input"],
+            [
+                {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+                {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+                {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "third"}]},
+            ],
+        )
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_stream_passthrough(self, mock_start) -> None:
+        chunk = b'data: {"type":"response.output_text.delta","delta":"hello"}\n\n'
+        mock_start.return_value = (
+            FakeUpstream(
+                headers={"Content-Type": "text/event-stream"},
+                content=chunk,
+            ),
+            None,
+        )
+        response = self.client.post(
+            "/v1/responses",
+            json={"model": "gpt-5.4", "input": "hello", "stream": True},
+        )
+        self.assertEqual(response.status_code, 200)
+        self.assertIn("response.output_text.delta", response.get_data(as_text=True))
+
+    @patch("chatmock.routes_openai.start_upstream_raw_request")
+    def test_responses_route_rejects_unsupported_explicit_priority(self, mock_start) -> None:
+        response = self.client.post(
+            "/v1/responses",
+            json={"model": "gpt-5.3-codex", "input": "hello", "service_tier": "priority"},
+        )
+        body = response.get_json()
+        self.assertEqual(response.status_code, 400)
+        self.assertIn("Fast mode is not supported", body["error"]["message"])
+        mock_start.assert_not_called()
+
+    @patch("chatmock.websocket_routes.get_effective_chatgpt_auth", return_value=("token", "acct"))
+    @patch("chatmock.websocket_routes.connect_upstream_websocket")
+    def test_responses_websocket_rewrites_response_create(self, mock_connect, _mock_auth) -> None:
+        class FakeUpstreamWebsocket:
+            def __init__(self) -> None:
+                self.sent: list[str] = []
+                self._messages = [
+                    json.dumps({"type": "response.created", "response": {"id": "resp_ws_1"}}),
+                    json.dumps({
+                        "type": "response.output_item.done",
+                        "item": {
+                            "type": "message",
+                            "role": "assistant",
+                            "id": "msg_1",
+                            "content": [{"type": "output_text", "text": "assistant output"}],
+                        },
+                    }),
+                    json.dumps({"type": "response.completed", "response": {"id": "resp_ws_1"}}),
+                    json.dumps({"type": "response.created", "response": {"id": "resp_ws_2"}}),
+                    json.dumps({"type": "response.completed", "response": {"id": "resp_ws_2"}}),
+                ]
+
+            def send(self, message: str) -> None:
+                self.sent.append(message)
+
+            def recv(self) -> str:
+                return self._messages.pop(0)
+
+            def close(self) -> None:
+                return None
+
+        fake_upstream = FakeUpstreamWebsocket()
+        mock_connect.return_value = fake_upstream
+
+        app = create_app()
+
+        sock = socket.socket()
+        sock.bind(("127.0.0.1", 0))
+        host, port = sock.getsockname()
+        sock.close()
+
+        server_thread = threading.Thread(
+            target=app.run,
+            kwargs={
+                "host": host,
+                "port": port,
+                "use_reloader": False,
+                "threaded": True,
+            },
+            daemon=True,
+        )
+        server_thread.start()
+        time.sleep(0.5)
+
+        with ws_connect(f"ws://{host}:{port}/v1/responses") as client:
+            client.send(json.dumps({"type": "response.create", "model": "gpt-5.4", "input": "hello", "fast_mode": True}))
+            first = json.loads(client.recv())
+            assistant = json.loads(client.recv())
+            second = json.loads(client.recv())
+            client.send(
+                json.dumps(
+                    {
+                        "type": "response.create",
+                        "model": "gpt-5.4",
+                        "fast_mode": True,
+                        "input": [
+                            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]},
+                            {"type": "message", "role": "assistant", "id": "msg_1", "content": [{"type": "output_text", "text": "assistant output"}]},
+                            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]},
+                        ],
+                    }
+                )
+            )
+            third = json.loads(client.recv())
+            fourth = json.loads(client.recv())
+
+        self.assertEqual(first["type"], "response.created")
+        self.assertEqual(assistant["type"], "response.output_item.done")
+        self.assertEqual(second["type"], "response.completed")
+        self.assertEqual(third["type"], "response.created")
+        self.assertEqual(fourth["type"], "response.completed")
+        outbound = json.loads(fake_upstream.sent[0])
+        self.assertEqual(outbound["model"], "gpt-5.4")
+        self.assertEqual(outbound["service_tier"], "priority")
+        self.assertEqual(outbound["type"], "response.create")
+        self.assertEqual(
+            outbound["input"],
+            [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}],
+        )
+        self.assertIn("prompt_cache_key", outbound)
+        follow_up = json.loads(fake_upstream.sent[1])
+        self.assertEqual(follow_up["previous_response_id"], "resp_ws_1")
+        self.assertEqual(
+            follow_up["input"],
+            [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "second"}]}],
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/uv.lock b/uv.lock
new file mode 100644
index 0000000000000000000000000000000000000000..fd1ec988b8e324e8c270b4e4ac303e56a91564a6
--- /dev/null
+++ b/uv.lock
@@ -0,0 +1,621 @@
+version = 1
+revision = 1
+requires-python = ">=3.11"
+
+[[package]]
+name = "altgraph"
+version = "0.17.5"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/f8/97fdf103f38fed6792a1601dbc16cc8aac56e7459a9fff08c812d8ae177a/altgraph-0.17.5.tar.gz", hash = "sha256:c87b395dd12fabde9c99573a9749d67da8d29ef9de0125c7f536699b4a9bc9e7", size = 48428 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a9/ba/000a1996d4308bc65120167c21241a3b205464a2e0b58deda26ae8ac21d1/altgraph-0.17.5-py2.py3-none-any.whl", hash = "sha256:f3a22400bce1b0c701683820ac4f3b159cd301acab067c51c653e06961600597", size = 21228 },
+]
+
+[[package]]
+name = "blinker"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/28/9b3f50ce0e048515135495f198351908d99540d69bfdc8c1d15b73dc55ce/blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf", size = 22460 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458 },
+]
+
+[[package]]
+name = "certifi"
+version = "2025.8.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/dc/67/960ebe6bf230a96cda2e0abcf73af550ec4f090005363542f0765df162e0/certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407", size = 162386 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e5/48/1549795ba7742c948d2ad169c1c8cdbae65bc450d6cd753d124b17c8cd32/certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5", size = 161216 },
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.4.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988 },
+    { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324 },
+    { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742 },
+    { url = "https://files.pythonhosted.org/packages/62/8b/171935adf2312cd745d290ed93cf16cf0dfe320863ab7cbeeae1dcd6535f/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8ef3c867360f88ac904fd3f5e1f902f13307af9052646963ee08ff4f131adafc", size = 160863 },
+    { url = "https://files.pythonhosted.org/packages/09/73/ad875b192bda14f2173bfc1bc9a55e009808484a4b256748d931b6948442/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d9e45d7faa48ee908174d8fe84854479ef838fc6a705c9315372eacbc2f02897", size = 157837 },
+    { url = "https://files.pythonhosted.org/packages/6d/fc/de9cce525b2c5b94b47c70a4b4fb19f871b24995c728e957ee68ab1671ea/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:840c25fb618a231545cbab0564a799f101b63b9901f2569faecd6b222ac72381", size = 151550 },
+    { url = "https://files.pythonhosted.org/packages/55/c2/43edd615fdfba8c6f2dfbd459b25a6b3b551f24ea21981e23fb768503ce1/charset_normalizer-3.4.4-cp311-cp311-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ca5862d5b3928c4940729dacc329aa9102900382fea192fc5e52eb69d6093815", size = 149162 },
+    { url = "https://files.pythonhosted.org/packages/03/86/bde4ad8b4d0e9429a4e82c1e8f5c659993a9a863ad62c7df05cf7b678d75/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d9c7f57c3d666a53421049053eaacdd14bbd0a528e2186fcb2e672effd053bb0", size = 150019 },
+    { url = "https://files.pythonhosted.org/packages/1f/86/a151eb2af293a7e7bac3a739b81072585ce36ccfb4493039f49f1d3cae8c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:277e970e750505ed74c832b4bf75dac7476262ee2a013f5574dd49075879e161", size = 143310 },
+    { url = "https://files.pythonhosted.org/packages/b5/fe/43dae6144a7e07b87478fdfc4dbe9efd5defb0e7ec29f5f58a55aeef7bf7/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:31fd66405eaf47bb62e8cd575dc621c56c668f27d46a61d975a249930dd5e2a4", size = 162022 },
+    { url = "https://files.pythonhosted.org/packages/80/e6/7aab83774f5d2bca81f42ac58d04caf44f0cc2b65fc6db2b3b2e8a05f3b3/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_riscv64.whl", hash = "sha256:0d3d8f15c07f86e9ff82319b3d9ef6f4bf907608f53fe9d92b28ea9ae3d1fd89", size = 149383 },
+    { url = "https://files.pythonhosted.org/packages/4f/e8/b289173b4edae05c0dde07f69f8db476a0b511eac556dfe0d6bda3c43384/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:9f7fcd74d410a36883701fafa2482a6af2ff5ba96b9a620e9e0721e28ead5569", size = 159098 },
+    { url = "https://files.pythonhosted.org/packages/d8/df/fe699727754cae3f8478493c7f45f777b17c3ef0600e28abfec8619eb49c/charset_normalizer-3.4.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ebf3e58c7ec8a8bed6d66a75d7fb37b55e5015b03ceae72a8e7c74495551e224", size = 152991 },
+    { url = "https://files.pythonhosted.org/packages/1a/86/584869fe4ddb6ffa3bd9f491b87a01568797fb9bd8933f557dba9771beaf/charset_normalizer-3.4.4-cp311-cp311-win32.whl", hash = "sha256:eecbc200c7fd5ddb9a7f16c7decb07b566c29fa2161a16cf67b8d068bd21690a", size = 99456 },
+    { url = "https://files.pythonhosted.org/packages/65/f6/62fdd5feb60530f50f7e38b4f6a1d5203f4d16ff4f9f0952962c044e919a/charset_normalizer-3.4.4-cp311-cp311-win_amd64.whl", hash = "sha256:5ae497466c7901d54b639cf42d5b8c1b6a4fead55215500d2f486d34db48d016", size = 106978 },
+    { url = "https://files.pythonhosted.org/packages/7a/9d/0710916e6c82948b3be62d9d398cb4fcf4e97b56d6a6aeccd66c4b2f2bd5/charset_normalizer-3.4.4-cp311-cp311-win_arm64.whl", hash = "sha256:65e2befcd84bc6f37095f5961e68a6f077bf44946771354a28ad434c2cce0ae1", size = 99969 },
+    { url = "https://files.pythonhosted.org/packages/f3/85/1637cd4af66fa687396e757dec650f28025f2a2f5a5531a3208dc0ec43f2/charset_normalizer-3.4.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0a98e6759f854bd25a58a73fa88833fba3b7c491169f86ce1180c948ab3fd394", size = 208425 },
+    { url = "https://files.pythonhosted.org/packages/9d/6a/04130023fef2a0d9c62d0bae2649b69f7b7d8d24ea5536feef50551029df/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b5b290ccc2a263e8d185130284f8501e3e36c5e02750fc6b6bdeb2e9e96f1e25", size = 148162 },
+    { url = "https://files.pythonhosted.org/packages/78/29/62328d79aa60da22c9e0b9a66539feae06ca0f5a4171ac4f7dc285b83688/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74bb723680f9f7a6234dcf67aea57e708ec1fbdf5699fb91dfd6f511b0a320ef", size = 144558 },
+    { url = "https://files.pythonhosted.org/packages/86/bb/b32194a4bf15b88403537c2e120b817c61cd4ecffa9b6876e941c3ee38fe/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f1e34719c6ed0b92f418c7c780480b26b5d9c50349e9a9af7d76bf757530350d", size = 161497 },
+    { url = "https://files.pythonhosted.org/packages/19/89/a54c82b253d5b9b111dc74aca196ba5ccfcca8242d0fb64146d4d3183ff1/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:2437418e20515acec67d86e12bf70056a33abdacb5cb1655042f6538d6b085a8", size = 159240 },
+    { url = "https://files.pythonhosted.org/packages/c0/10/d20b513afe03acc89ec33948320a5544d31f21b05368436d580dec4e234d/charset_normalizer-3.4.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:11d694519d7f29d6cd09f6ac70028dba10f92f6cdd059096db198c283794ac86", size = 153471 },
+    { url = "https://files.pythonhosted.org/packages/61/fa/fbf177b55bdd727010f9c0a3c49eefa1d10f960e5f09d1d887bf93c2e698/charset_normalizer-3.4.4-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:ac1c4a689edcc530fc9d9aa11f5774b9e2f33f9a0c6a57864e90908f5208d30a", size = 150864 },
+    { url = "https://files.pythonhosted.org/packages/05/12/9fbc6a4d39c0198adeebbde20b619790e9236557ca59fc40e0e3cebe6f40/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:21d142cc6c0ec30d2efee5068ca36c128a30b0f2c53c1c07bd78cb6bc1d3be5f", size = 150647 },
+    { url = "https://files.pythonhosted.org/packages/ad/1f/6a9a593d52e3e8c5d2b167daf8c6b968808efb57ef4c210acb907c365bc4/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:5dbe56a36425d26d6cfb40ce79c314a2e4dd6211d51d6d2191c00bed34f354cc", size = 145110 },
+    { url = "https://files.pythonhosted.org/packages/30/42/9a52c609e72471b0fc54386dc63c3781a387bb4fe61c20231a4ebcd58bdd/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5bfbb1b9acf3334612667b61bd3002196fe2a1eb4dd74d247e0f2a4d50ec9bbf", size = 162839 },
+    { url = "https://files.pythonhosted.org/packages/c4/5b/c0682bbf9f11597073052628ddd38344a3d673fda35a36773f7d19344b23/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:d055ec1e26e441f6187acf818b73564e6e6282709e9bcb5b63f5b23068356a15", size = 150667 },
+    { url = "https://files.pythonhosted.org/packages/e4/24/a41afeab6f990cf2daf6cb8c67419b63b48cf518e4f56022230840c9bfb2/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:af2d8c67d8e573d6de5bc30cdb27e9b95e49115cd9baad5ddbd1a6207aaa82a9", size = 160535 },
+    { url = "https://files.pythonhosted.org/packages/2a/e5/6a4ce77ed243c4a50a1fecca6aaaab419628c818a49434be428fe24c9957/charset_normalizer-3.4.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:780236ac706e66881f3b7f2f32dfe90507a09e67d1d454c762cf642e6e1586e0", size = 154816 },
+    { url = "https://files.pythonhosted.org/packages/a8/ef/89297262b8092b312d29cdb2517cb1237e51db8ecef2e9af5edbe7b683b1/charset_normalizer-3.4.4-cp312-cp312-win32.whl", hash = "sha256:5833d2c39d8896e4e19b689ffc198f08ea58116bee26dea51e362ecc7cd3ed26", size = 99694 },
+    { url = "https://files.pythonhosted.org/packages/3d/2d/1e5ed9dd3b3803994c155cd9aacb60c82c331bad84daf75bcb9c91b3295e/charset_normalizer-3.4.4-cp312-cp312-win_amd64.whl", hash = "sha256:a79cfe37875f822425b89a82333404539ae63dbdddf97f84dcbc3d339aae9525", size = 107131 },
+    { url = "https://files.pythonhosted.org/packages/d0/d9/0ed4c7098a861482a7b6a95603edce4c0d9db2311af23da1fb2b75ec26fc/charset_normalizer-3.4.4-cp312-cp312-win_arm64.whl", hash = "sha256:376bec83a63b8021bb5c8ea75e21c4ccb86e7e45ca4eb81146091b56599b80c3", size = 100390 },
+    { url = "https://files.pythonhosted.org/packages/97/45/4b3a1239bbacd321068ea6e7ac28875b03ab8bc0aa0966452db17cd36714/charset_normalizer-3.4.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e1f185f86a6f3403aa2420e815904c67b2f9ebc443f045edd0de921108345794", size = 208091 },
+    { url = "https://files.pythonhosted.org/packages/7d/62/73a6d7450829655a35bb88a88fca7d736f9882a27eacdca2c6d505b57e2e/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b39f987ae8ccdf0d2642338faf2abb1862340facc796048b604ef14919e55ed", size = 147936 },
+    { url = "https://files.pythonhosted.org/packages/89/c5/adb8c8b3d6625bef6d88b251bbb0d95f8205831b987631ab0c8bb5d937c2/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:3162d5d8ce1bb98dd51af660f2121c55d0fa541b46dff7bb9b9f86ea1d87de72", size = 144180 },
+    { url = "https://files.pythonhosted.org/packages/91/ed/9706e4070682d1cc219050b6048bfd293ccf67b3d4f5a4f39207453d4b99/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:81d5eb2a312700f4ecaa977a8235b634ce853200e828fbadf3a9c50bab278328", size = 161346 },
+    { url = "https://files.pythonhosted.org/packages/d5/0d/031f0d95e4972901a2f6f09ef055751805ff541511dc1252ba3ca1f80cf5/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5bd2293095d766545ec1a8f612559f6b40abc0eb18bb2f5d1171872d34036ede", size = 158874 },
+    { url = "https://files.pythonhosted.org/packages/f5/83/6ab5883f57c9c801ce5e5677242328aa45592be8a00644310a008d04f922/charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a8a8b89589086a25749f471e6a900d3f662d1d3b6e2e59dcecf787b1cc3a1894", size = 153076 },
+    { url = "https://files.pythonhosted.org/packages/75/1e/5ff781ddf5260e387d6419959ee89ef13878229732732ee73cdae01800f2/charset_normalizer-3.4.4-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc7637e2f80d8530ee4a78e878bce464f70087ce73cf7c1caf142416923b98f1", size = 150601 },
+    { url = "https://files.pythonhosted.org/packages/d7/57/71be810965493d3510a6ca79b90c19e48696fb1ff964da319334b12677f0/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f8bf04158c6b607d747e93949aa60618b61312fe647a6369f88ce2ff16043490", size = 150376 },
+    { url = "https://files.pythonhosted.org/packages/e5/d5/c3d057a78c181d007014feb7e9f2e65905a6c4ef182c0ddf0de2924edd65/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:554af85e960429cf30784dd47447d5125aaa3b99a6f0683589dbd27e2f45da44", size = 144825 },
+    { url = "https://files.pythonhosted.org/packages/e6/8c/d0406294828d4976f275ffbe66f00266c4b3136b7506941d87c00cab5272/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:74018750915ee7ad843a774364e13a3db91682f26142baddf775342c3f5b1133", size = 162583 },
+    { url = "https://files.pythonhosted.org/packages/d7/24/e2aa1f18c8f15c4c0e932d9287b8609dd30ad56dbe41d926bd846e22fb8d/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:c0463276121fdee9c49b98908b3a89c39be45d86d1dbaa22957e38f6321d4ce3", size = 150366 },
+    { url = "https://files.pythonhosted.org/packages/e4/5b/1e6160c7739aad1e2df054300cc618b06bf784a7a164b0f238360721ab86/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:362d61fd13843997c1c446760ef36f240cf81d3ebf74ac62652aebaf7838561e", size = 160300 },
+    { url = "https://files.pythonhosted.org/packages/7a/10/f882167cd207fbdd743e55534d5d9620e095089d176d55cb22d5322f2afd/charset_normalizer-3.4.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9a26f18905b8dd5d685d6d07b0cdf98a79f3c7a918906af7cc143ea2e164c8bc", size = 154465 },
+    { url = "https://files.pythonhosted.org/packages/89/66/c7a9e1b7429be72123441bfdbaf2bc13faab3f90b933f664db506dea5915/charset_normalizer-3.4.4-cp313-cp313-win32.whl", hash = "sha256:9b35f4c90079ff2e2edc5b26c0c77925e5d2d255c42c74fdb70fb49b172726ac", size = 99404 },
+    { url = "https://files.pythonhosted.org/packages/c4/26/b9924fa27db384bdcd97ab83b4f0a8058d96ad9626ead570674d5e737d90/charset_normalizer-3.4.4-cp313-cp313-win_amd64.whl", hash = "sha256:b435cba5f4f750aa6c0a0d92c541fb79f69a387c91e61f1795227e4ed9cece14", size = 107092 },
+    { url = "https://files.pythonhosted.org/packages/af/8f/3ed4bfa0c0c72a7ca17f0380cd9e4dd842b09f664e780c13cff1dcf2ef1b/charset_normalizer-3.4.4-cp313-cp313-win_arm64.whl", hash = "sha256:542d2cee80be6f80247095cc36c418f7bddd14f4a6de45af91dfad36d817bba2", size = 100408 },
+    { url = "https://files.pythonhosted.org/packages/2a/35/7051599bd493e62411d6ede36fd5af83a38f37c4767b92884df7301db25d/charset_normalizer-3.4.4-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:da3326d9e65ef63a817ecbcc0df6e94463713b754fe293eaa03da99befb9a5bd", size = 207746 },
+    { url = "https://files.pythonhosted.org/packages/10/9a/97c8d48ef10d6cd4fcead2415523221624bf58bcf68a802721a6bc807c8f/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8af65f14dc14a79b924524b1e7fffe304517b2bff5a58bf64f30b98bbc5079eb", size = 147889 },
+    { url = "https://files.pythonhosted.org/packages/10/bf/979224a919a1b606c82bd2c5fa49b5c6d5727aa47b4312bb27b1734f53cd/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:74664978bb272435107de04e36db5a9735e78232b85b77d45cfb38f758efd33e", size = 143641 },
+    { url = "https://files.pythonhosted.org/packages/ba/33/0ad65587441fc730dc7bd90e9716b30b4702dc7b617e6ba4997dc8651495/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:752944c7ffbfdd10c074dc58ec2d5a8a4cd9493b314d367c14d24c17684ddd14", size = 160779 },
+    { url = "https://files.pythonhosted.org/packages/67/ed/331d6b249259ee71ddea93f6f2f0a56cfebd46938bde6fcc6f7b9a3d0e09/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d1f13550535ad8cff21b8d757a3257963e951d96e20ec82ab44bc64aeb62a191", size = 159035 },
+    { url = "https://files.pythonhosted.org/packages/67/ff/f6b948ca32e4f2a4576aa129d8bed61f2e0543bf9f5f2b7fc3758ed005c9/charset_normalizer-3.4.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ecaae4149d99b1c9e7b88bb03e3221956f68fd6d50be2ef061b2381b61d20838", size = 152542 },
+    { url = "https://files.pythonhosted.org/packages/16/85/276033dcbcc369eb176594de22728541a925b2632f9716428c851b149e83/charset_normalizer-3.4.4-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:cb6254dc36b47a990e59e1068afacdcd02958bdcce30bb50cc1700a8b9d624a6", size = 149524 },
+    { url = "https://files.pythonhosted.org/packages/9e/f2/6a2a1f722b6aba37050e626530a46a68f74e63683947a8acff92569f979a/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c8ae8a0f02f57a6e61203a31428fa1d677cbe50c93622b4149d5c0f319c1d19e", size = 150395 },
+    { url = "https://files.pythonhosted.org/packages/60/bb/2186cb2f2bbaea6338cad15ce23a67f9b0672929744381e28b0592676824/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_armv7l.whl", hash = "sha256:47cc91b2f4dd2833fddaedd2893006b0106129d4b94fdb6af1f4ce5a9965577c", size = 143680 },
+    { url = "https://files.pythonhosted.org/packages/7d/a5/bf6f13b772fbb2a90360eb620d52ed8f796f3c5caee8398c3b2eb7b1c60d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:82004af6c302b5d3ab2cfc4cc5f29db16123b1a8417f2e25f9066f91d4411090", size = 162045 },
+    { url = "https://files.pythonhosted.org/packages/df/c5/d1be898bf0dc3ef9030c3825e5d3b83f2c528d207d246cbabe245966808d/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:2b7d8f6c26245217bd2ad053761201e9f9680f8ce52f0fcd8d0755aeae5b2152", size = 149687 },
+    { url = "https://files.pythonhosted.org/packages/a5/42/90c1f7b9341eef50c8a1cb3f098ac43b0508413f33affd762855f67a410e/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:799a7a5e4fb2d5898c60b640fd4981d6a25f1c11790935a44ce38c54e985f828", size = 160014 },
+    { url = "https://files.pythonhosted.org/packages/76/be/4d3ee471e8145d12795ab655ece37baed0929462a86e72372fd25859047c/charset_normalizer-3.4.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:99ae2cffebb06e6c22bdc25801d7b30f503cc87dbd283479e7b606f70aff57ec", size = 154044 },
+    { url = "https://files.pythonhosted.org/packages/b0/6f/8f7af07237c34a1defe7defc565a9bc1807762f672c0fde711a4b22bf9c0/charset_normalizer-3.4.4-cp314-cp314-win32.whl", hash = "sha256:f9d332f8c2a2fcbffe1378594431458ddbef721c1769d78e2cbc06280d8155f9", size = 99940 },
+    { url = "https://files.pythonhosted.org/packages/4b/51/8ade005e5ca5b0d80fb4aff72a3775b325bdc3d27408c8113811a7cbe640/charset_normalizer-3.4.4-cp314-cp314-win_amd64.whl", hash = "sha256:8a6562c3700cce886c5be75ade4a5db4214fda19fede41d9792d100288d8f94c", size = 107104 },
+    { url = "https://files.pythonhosted.org/packages/da/5f/6b8f83a55bb8278772c5ae54a577f3099025f9ade59d0136ac24a0df4bde/charset_normalizer-3.4.4-cp314-cp314-win_arm64.whl", hash = "sha256:de00632ca48df9daf77a2c65a484531649261ec9f25489917f09e455cb09ddb2", size = 100743 },
+    { url = "https://files.pythonhosted.org/packages/0a/4c/925909008ed5a988ccbb72dcc897407e5d6d3bd72410d69e051fc0c14647/charset_normalizer-3.4.4-py3-none-any.whl", hash = "sha256:7a32c560861a02ff789ad905a2fe94e3f840803362c84fecf1851cb4cf3dc37f", size = 53402 },
+]
+
+[[package]]
+name = "chatmock"
+source = { editable = "." }
+dependencies = [
+    { name = "blinker" },
+    { name = "certifi" },
+    { name = "flask" },
+    { name = "flask-sock" },
+    { name = "idna" },
+    { name = "itsdangerous" },
+    { name = "jinja2" },
+    { name = "markupsafe" },
+    { name = "requests" },
+    { name = "urllib3" },
+    { name = "websockets" },
+    { name = "werkzeug" },
+]
+
+[package.optional-dependencies]
+gui = [
+    { name = "pillow" },
+    { name = "pyinstaller" },
+    { name = "pyside6" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "blinker", specifier = "==1.9.0" },
+    { name = "certifi", specifier = "==2025.8.3" },
+    { name = "flask", specifier = "==3.1.1" },
+    { name = "flask-sock", specifier = "==0.7.0" },
+    { name = "idna", specifier = "==3.10" },
+    { name = "itsdangerous", specifier = "==2.2.0" },
+    { name = "jinja2", specifier = "==3.1.6" },
+    { name = "markupsafe", specifier = "==3.0.2" },
+    { name = "pillow", marker = "extra == 'gui'", specifier = "==11.3.0" },
+    { name = "pyinstaller", marker = "extra == 'gui'", specifier = "==6.16.0" },
+    { name = "pyside6", marker = "extra == 'gui'", specifier = "==6.9.2" },
+    { name = "requests", specifier = "==2.32.5" },
+    { name = "urllib3", specifier = "==2.5.0" },
+    { name = "websockets", specifier = "==15.0.1" },
+    { name = "werkzeug", specifier = "==3.1.3" },
+]
+provides-extras = ["gui"]
+
+[[package]]
+name = "click"
+version = "8.2.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215 },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
+]
+
+[[package]]
+name = "flask"
+version = "3.1.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "blinker" },
+    { name = "click" },
+    { name = "itsdangerous" },
+    { name = "jinja2" },
+    { name = "markupsafe" },
+    { name = "werkzeug" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c0/de/e47735752347f4128bcf354e0da07ef311a78244eba9e3dc1d4a5ab21a98/flask-3.1.1.tar.gz", hash = "sha256:284c7b8f2f58cb737f0cf1c30fd7eaf0ccfcde196099d24ecede3fc2005aa59e", size = 753440 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3d/68/9d4508e893976286d2ead7f8f571314af6c2037af34853a30fd769c02e9d/flask-3.1.1-py3-none-any.whl", hash = "sha256:07aae2bb5eaf77993ef57e357491839f5fd9f4dc281593a81a9e4d79a24f295c", size = 103305 },
+]
+
+[[package]]
+name = "flask-sock"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "flask" },
+    { name = "simple-websocket" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/8d/8f/c6ab717dc90f4e46d1430335cd4ab13e3629410bb760c0ead6de476760fb/flask-sock-0.7.0.tar.gz", hash = "sha256:e023b578284195a443b8d8bdb4469e6a6acf694b89aeb51315b1a34fcf427b7d", size = 4334 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d8/98/107728ce3f430b5481eb426ccc5e1f7c8ab0bd01eaf231c62a8d528ff721/flask_sock-0.7.0-py3-none-any.whl", hash = "sha256:caac4d679392aaf010d02fabcf73d52019f5bdaf1c9c131ec5a428cb3491204a", size = 3982 },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
+]
+
+[[package]]
+name = "idna"
+version = "3.10"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
+]
+
+[[package]]
+name = "itsdangerous"
+version = "2.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/9c/cb/8ac0172223afbccb63986cc25049b154ecfb5e85932587206f42317be31d/itsdangerous-2.2.0.tar.gz", hash = "sha256:e0050c0b7da1eea53ffaf149c0cfbb5c6e2e2b69c4bef22c81fa6eb73e5f6173", size = 54410 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/96/92447566d16df59b2a776c0fb82dbc4d9e07cd95062562af01e408583fc4/itsdangerous-2.2.0-py3-none-any.whl", hash = "sha256:c6242fc49e35958c8b15141343aa660db5fc54d4f13a1db01a3f5891b98700ef", size = 16234 },
+]
+
+[[package]]
+name = "jinja2"
+version = "3.1.6"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 },
+]
+
+[[package]]
+name = "macholib"
+version = "1.16.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/10/2f/97589876ea967487978071c9042518d28b958d87b17dceb7cdc1d881f963/macholib-1.16.4.tar.gz", hash = "sha256:f408c93ab2e995cd2c46e34fe328b130404be143469e41bc366c807448979362", size = 59427 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/d1/a9f36f8ecdf0fb7c9b1e78c8d7af12b8c8754e74851ac7b94a8305540fc7/macholib-1.16.4-py2.py3-none-any.whl", hash = "sha256:da1a3fa8266e30f0ce7e97c6a54eefaae8edd1e5f86f3eb8b95457cae90265ea", size = 38117 },
+]
+
+[[package]]
+name = "markupsafe"
+version = "3.0.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353 },
+    { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392 },
+    { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984 },
+    { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120 },
+    { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032 },
+    { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057 },
+    { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359 },
+    { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306 },
+    { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094 },
+    { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521 },
+    { url = "https://files.pythonhosted.org/packages/22/09/d1f21434c97fc42f09d290cbb6350d44eb12f09cc62c9476effdb33a18aa/MarkupSafe-3.0.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9778bd8ab0a994ebf6f84c2b949e65736d5575320a17ae8984a77fab08db94cf", size = 14274 },
+    { url = "https://files.pythonhosted.org/packages/6b/b0/18f76bba336fa5aecf79d45dcd6c806c280ec44538b3c13671d49099fdd0/MarkupSafe-3.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:846ade7b71e3536c4e56b386c2a47adf5741d2d8b94ec9dc3e92e5e1ee1e2225", size = 12348 },
+    { url = "https://files.pythonhosted.org/packages/e0/25/dd5c0f6ac1311e9b40f4af06c78efde0f3b5cbf02502f8ef9501294c425b/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c99d261bd2d5f6b59325c92c73df481e05e57f19837bdca8413b9eac4bd8028", size = 24149 },
+    { url = "https://files.pythonhosted.org/packages/f3/f0/89e7aadfb3749d0f52234a0c8c7867877876e0a20b60e2188e9850794c17/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e17c96c14e19278594aa4841ec148115f9c7615a47382ecb6b82bd8fea3ab0c8", size = 23118 },
+    { url = "https://files.pythonhosted.org/packages/d5/da/f2eeb64c723f5e3777bc081da884b414671982008c47dcc1873d81f625b6/MarkupSafe-3.0.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:88416bd1e65dcea10bc7569faacb2c20ce071dd1f87539ca2ab364bf6231393c", size = 22993 },
+    { url = "https://files.pythonhosted.org/packages/da/0e/1f32af846df486dce7c227fe0f2398dc7e2e51d4a370508281f3c1c5cddc/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2181e67807fc2fa785d0592dc2d6206c019b9502410671cc905d132a92866557", size = 24178 },
+    { url = "https://files.pythonhosted.org/packages/c4/f6/bb3ca0532de8086cbff5f06d137064c8410d10779c4c127e0e47d17c0b71/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:52305740fe773d09cffb16f8ed0427942901f00adedac82ec8b67752f58a1b22", size = 23319 },
+    { url = "https://files.pythonhosted.org/packages/a2/82/8be4c96ffee03c5b4a034e60a31294daf481e12c7c43ab8e34a1453ee48b/MarkupSafe-3.0.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ad10d3ded218f1039f11a75f8091880239651b52e9bb592ca27de44eed242a48", size = 23352 },
+    { url = "https://files.pythonhosted.org/packages/51/ae/97827349d3fcffee7e184bdf7f41cd6b88d9919c80f0263ba7acd1bbcb18/MarkupSafe-3.0.2-cp312-cp312-win32.whl", hash = "sha256:0f4ca02bea9a23221c0182836703cbf8930c5e9454bacce27e767509fa286a30", size = 15097 },
+    { url = "https://files.pythonhosted.org/packages/c1/80/a61f99dc3a936413c3ee4e1eecac96c0da5ed07ad56fd975f1a9da5bc630/MarkupSafe-3.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:8e06879fc22a25ca47312fbe7c8264eb0b662f6db27cb2d3bbbc74b1df4b9b87", size = 15601 },
+    { url = "https://files.pythonhosted.org/packages/83/0e/67eb10a7ecc77a0c2bbe2b0235765b98d164d81600746914bebada795e97/MarkupSafe-3.0.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ba9527cdd4c926ed0760bc301f6728ef34d841f405abf9d4f959c478421e4efd", size = 14274 },
+    { url = "https://files.pythonhosted.org/packages/2b/6d/9409f3684d3335375d04e5f05744dfe7e9f120062c9857df4ab490a1031a/MarkupSafe-3.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f8b3d067f2e40fe93e1ccdd6b2e1d16c43140e76f02fb1319a05cf2b79d99430", size = 12352 },
+    { url = "https://files.pythonhosted.org/packages/d2/f5/6eadfcd3885ea85fe2a7c128315cc1bb7241e1987443d78c8fe712d03091/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:569511d3b58c8791ab4c2e1285575265991e6d8f8700c7be0e88f86cb0672094", size = 24122 },
+    { url = "https://files.pythonhosted.org/packages/0c/91/96cf928db8236f1bfab6ce15ad070dfdd02ed88261c2afafd4b43575e9e9/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:15ab75ef81add55874e7ab7055e9c397312385bd9ced94920f2802310c930396", size = 23085 },
+    { url = "https://files.pythonhosted.org/packages/c2/cf/c9d56af24d56ea04daae7ac0940232d31d5a8354f2b457c6d856b2057d69/MarkupSafe-3.0.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3818cb119498c0678015754eba762e0d61e5b52d34c8b13d770f0719f7b1d79", size = 22978 },
+    { url = "https://files.pythonhosted.org/packages/2a/9f/8619835cd6a711d6272d62abb78c033bda638fdc54c4e7f4272cf1c0962b/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cdb82a876c47801bb54a690c5ae105a46b392ac6099881cdfb9f6e95e4014c6a", size = 24208 },
+    { url = "https://files.pythonhosted.org/packages/f9/bf/176950a1792b2cd2102b8ffeb5133e1ed984547b75db47c25a67d3359f77/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:cabc348d87e913db6ab4aa100f01b08f481097838bdddf7c7a84b7575b7309ca", size = 23357 },
+    { url = "https://files.pythonhosted.org/packages/ce/4f/9a02c1d335caabe5c4efb90e1b6e8ee944aa245c1aaaab8e8a618987d816/MarkupSafe-3.0.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:444dcda765c8a838eaae23112db52f1efaf750daddb2d9ca300bcae1039adc5c", size = 23344 },
+    { url = "https://files.pythonhosted.org/packages/ee/55/c271b57db36f748f0e04a759ace9f8f759ccf22b4960c270c78a394f58be/MarkupSafe-3.0.2-cp313-cp313-win32.whl", hash = "sha256:bcf3e58998965654fdaff38e58584d8937aa3096ab5354d493c77d1fdd66d7a1", size = 15101 },
+    { url = "https://files.pythonhosted.org/packages/29/88/07df22d2dd4df40aba9f3e402e6dc1b8ee86297dddbad4872bd5e7b0094f/MarkupSafe-3.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:e6a2a455bd412959b57a172ce6328d2dd1f01cb2135efda2e4576e8a23fa3b0f", size = 15603 },
+    { url = "https://files.pythonhosted.org/packages/62/6a/8b89d24db2d32d433dffcd6a8779159da109842434f1dd2f6e71f32f738c/MarkupSafe-3.0.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:b5a6b3ada725cea8a5e634536b1b01c30bcdcd7f9c6fff4151548d5bf6b3a36c", size = 14510 },
+    { url = "https://files.pythonhosted.org/packages/7a/06/a10f955f70a2e5a9bf78d11a161029d278eeacbd35ef806c3fd17b13060d/MarkupSafe-3.0.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:a904af0a6162c73e3edcb969eeeb53a63ceeb5d8cf642fade7d39e7963a22ddb", size = 12486 },
+    { url = "https://files.pythonhosted.org/packages/34/cf/65d4a571869a1a9078198ca28f39fba5fbb910f952f9dbc5220afff9f5e6/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa4e5faecf353ed117801a068ebab7b7e09ffb6e1d5e412dc852e0da018126c", size = 25480 },
+    { url = "https://files.pythonhosted.org/packages/0c/e3/90e9651924c430b885468b56b3d597cabf6d72be4b24a0acd1fa0e12af67/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c0ef13eaeee5b615fb07c9a7dadb38eac06a0608b41570d8ade51c56539e509d", size = 23914 },
+    { url = "https://files.pythonhosted.org/packages/66/8c/6c7cf61f95d63bb866db39085150df1f2a5bd3335298f14a66b48e92659c/MarkupSafe-3.0.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d16a81a06776313e817c951135cf7340a3e91e8c1ff2fac444cfd75fffa04afe", size = 23796 },
+    { url = "https://files.pythonhosted.org/packages/bb/35/cbe9238ec3f47ac9a7c8b3df7a808e7cb50fe149dc7039f5f454b3fba218/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:6381026f158fdb7c72a168278597a5e3a5222e83ea18f543112b2662a9b699c5", size = 25473 },
+    { url = "https://files.pythonhosted.org/packages/e6/32/7621a4382488aa283cc05e8984a9c219abad3bca087be9ec77e89939ded9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3d79d162e7be8f996986c064d1c7c817f6df3a77fe3d6859f6f9e7be4b8c213a", size = 24114 },
+    { url = "https://files.pythonhosted.org/packages/0d/80/0985960e4b89922cb5a0bac0ed39c5b96cbc1a536a99f30e8c220a996ed9/MarkupSafe-3.0.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:131a3c7689c85f5ad20f9f6fb1b866f402c445b220c19fe4308c0b147ccd2ad9", size = 24098 },
+    { url = "https://files.pythonhosted.org/packages/82/78/fedb03c7d5380df2427038ec8d973587e90561b2d90cd472ce9254cf348b/MarkupSafe-3.0.2-cp313-cp313t-win32.whl", hash = "sha256:ba8062ed2cf21c07a9e295d5b8a2a5ce678b913b45fdf68c32d95d6c1291e0b6", size = 15208 },
+    { url = "https://files.pythonhosted.org/packages/4f/65/6079a46068dfceaeabb5dcad6d674f5f5c61a6fa5673746f42a9f4c233b3/MarkupSafe-3.0.2-cp313-cp313t-win_amd64.whl", hash = "sha256:e444a31f8db13eb18ada366ab3cf45fd4b31e4db1236a4448f68778c1d1a5a2f", size = 15739 },
+]
+
+[[package]]
+name = "packaging"
+version = "26.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366 },
+]
+
+[[package]]
+name = "pefile"
+version = "2023.2.7"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/78/c5/3b3c62223f72e2360737fd2a57c30e5b2adecd85e70276879609a7403334/pefile-2023.2.7.tar.gz", hash = "sha256:82e6114004b3d6911c77c3953e3838654b04511b8b66e8583db70c65998017dc", size = 74854 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/55/26/d0ad8b448476d0a1e8d3ea5622dc77b916db84c6aa3cb1e1c0965af948fc/pefile-2023.2.7-py3-none-any.whl", hash = "sha256:da185cd2af68c08a6cd4481f7325ed600a88f6a813bad9dea07ab3ef73d8d8d6", size = 71791 },
+]
+
+[[package]]
+name = "pillow"
+version = "11.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3d6767bb38a8fc10e33796ba4ba210cbab9354b6d238/pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523", size = 47113069 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/db/26/77f8ed17ca4ffd60e1dcd220a6ec6d71210ba398cfa33a13a1cd614c5613/pillow-11.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1cd110edf822773368b396281a2293aeb91c90a2db00d78ea43e7e861631b722", size = 5316531 },
+    { url = "https://files.pythonhosted.org/packages/cb/39/ee475903197ce709322a17a866892efb560f57900d9af2e55f86db51b0a5/pillow-11.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c412fddd1b77a75aa904615ebaa6001f169b26fd467b4be93aded278266b288", size = 4686560 },
+    { url = "https://files.pythonhosted.org/packages/d5/90/442068a160fd179938ba55ec8c97050a612426fae5ec0a764e345839f76d/pillow-11.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1aa4de119a0ecac0a34a9c8bde33f34022e2e8f99104e47a3ca392fd60e37d", size = 5870978 },
+    { url = "https://files.pythonhosted.org/packages/13/92/dcdd147ab02daf405387f0218dcf792dc6dd5b14d2573d40b4caeef01059/pillow-11.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:91da1d88226663594e3f6b4b8c3c8d85bd504117d043740a8e0ec449087cc494", size = 7641168 },
+    { url = "https://files.pythonhosted.org/packages/6e/db/839d6ba7fd38b51af641aa904e2960e7a5644d60ec754c046b7d2aee00e5/pillow-11.3.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:643f189248837533073c405ec2f0bb250ba54598cf80e8c1e043381a60632f58", size = 5973053 },
+    { url = "https://files.pythonhosted.org/packages/f2/2f/d7675ecae6c43e9f12aa8d58b6012683b20b6edfbdac7abcb4e6af7a3784/pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106064daa23a745510dabce1d84f29137a37224831d88eb4ce94bb187b1d7e5f", size = 6640273 },
+    { url = "https://files.pythonhosted.org/packages/45/ad/931694675ede172e15b2ff03c8144a0ddaea1d87adb72bb07655eaffb654/pillow-11.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd8ff254faf15591e724dc7c4ddb6bf4793efcbe13802a4ae3e863cd300b493e", size = 6082043 },
+    { url = "https://files.pythonhosted.org/packages/3a/04/ba8f2b11fc80d2dd462d7abec16351b45ec99cbbaea4387648a44190351a/pillow-11.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:932c754c2d51ad2b2271fd01c3d121daaa35e27efae2a616f77bf164bc0b3e94", size = 6715516 },
+    { url = "https://files.pythonhosted.org/packages/48/59/8cd06d7f3944cc7d892e8533c56b0acb68399f640786313275faec1e3b6f/pillow-11.3.0-cp311-cp311-win32.whl", hash = "sha256:b4b8f3efc8d530a1544e5962bd6b403d5f7fe8b9e08227c6b255f98ad82b4ba0", size = 6274768 },
+    { url = "https://files.pythonhosted.org/packages/f1/cc/29c0f5d64ab8eae20f3232da8f8571660aa0ab4b8f1331da5c2f5f9a938e/pillow-11.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:1a992e86b0dd7aeb1f053cd506508c0999d710a8f07b4c791c63843fc6a807ac", size = 6986055 },
+    { url = "https://files.pythonhosted.org/packages/c6/df/90bd886fabd544c25addd63e5ca6932c86f2b701d5da6c7839387a076b4a/pillow-11.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:30807c931ff7c095620fe04448e2c2fc673fcbb1ffe2a7da3fb39613489b1ddd", size = 2423079 },
+    { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4", size = 5278800 },
+    { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69", size = 4686296 },
+    { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d", size = 5871726 },
+    { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6", size = 7644652 },
+    { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7", size = 5977787 },
+    { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024", size = 6645236 },
+    { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809", size = 6086950 },
+    { url = "https://files.pythonhosted.org/packages/0b/1a/7cff92e695a2a29ac1958c2a0fe4c0b2393b60aac13b04a4fe2735cad52d/pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d", size = 6723358 },
+    { url = "https://files.pythonhosted.org/packages/26/7d/73699ad77895f69edff76b0f332acc3d497f22f5d75e5360f78cbcaff248/pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149", size = 6275079 },
+    { url = "https://files.pythonhosted.org/packages/8c/ce/e7dfc873bdd9828f3b6e5c2bbb74e47a98ec23cc5c74fc4e54462f0d9204/pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d", size = 6986324 },
+    { url = "https://files.pythonhosted.org/packages/16/8f/b13447d1bf0b1f7467ce7d86f6e6edf66c0ad7cf44cf5c87a37f9bed9936/pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542", size = 2423067 },
+    { url = "https://files.pythonhosted.org/packages/1e/93/0952f2ed8db3a5a4c7a11f91965d6184ebc8cd7cbb7941a260d5f018cd2d/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd", size = 2128328 },
+    { url = "https://files.pythonhosted.org/packages/4b/e8/100c3d114b1a0bf4042f27e0f87d2f25e857e838034e98ca98fe7b8c0a9c/pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8", size = 2170652 },
+    { url = "https://files.pythonhosted.org/packages/aa/86/3f758a28a6e381758545f7cdb4942e1cb79abd271bea932998fc0db93cb6/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f", size = 2227443 },
+    { url = "https://files.pythonhosted.org/packages/01/f4/91d5b3ffa718df2f53b0dc109877993e511f4fd055d7e9508682e8aba092/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c", size = 5278474 },
+    { url = "https://files.pythonhosted.org/packages/f9/0e/37d7d3eca6c879fbd9dba21268427dffda1ab00d4eb05b32923d4fbe3b12/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd", size = 4686038 },
+    { url = "https://files.pythonhosted.org/packages/ff/b0/3426e5c7f6565e752d81221af9d3676fdbb4f352317ceafd42899aaf5d8a/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e", size = 5864407 },
+    { url = "https://files.pythonhosted.org/packages/fc/c1/c6c423134229f2a221ee53f838d4be9d82bab86f7e2f8e75e47b6bf6cd77/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1", size = 7639094 },
+    { url = "https://files.pythonhosted.org/packages/ba/c9/09e6746630fe6372c67c648ff9deae52a2bc20897d51fa293571977ceb5d/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805", size = 5973503 },
+    { url = "https://files.pythonhosted.org/packages/d5/1c/a2a29649c0b1983d3ef57ee87a66487fdeb45132df66ab30dd37f7dbe162/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8", size = 6642574 },
+    { url = "https://files.pythonhosted.org/packages/36/de/d5cc31cc4b055b6c6fd990e3e7f0f8aaf36229a2698501bcb0cdf67c7146/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2", size = 6084060 },
+    { url = "https://files.pythonhosted.org/packages/d5/ea/502d938cbaeec836ac28a9b730193716f0114c41325db428e6b280513f09/pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b", size = 6721407 },
+    { url = "https://files.pythonhosted.org/packages/45/9c/9c5e2a73f125f6cbc59cc7087c8f2d649a7ae453f83bd0362ff7c9e2aee2/pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3", size = 6273841 },
+    { url = "https://files.pythonhosted.org/packages/23/85/397c73524e0cd212067e0c969aa245b01d50183439550d24d9f55781b776/pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51", size = 6978450 },
+    { url = "https://files.pythonhosted.org/packages/17/d2/622f4547f69cd173955194b78e4d19ca4935a1b0f03a302d655c9f6aae65/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580", size = 2423055 },
+    { url = "https://files.pythonhosted.org/packages/dd/80/a8a2ac21dda2e82480852978416cfacd439a4b490a501a288ecf4fe2532d/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e", size = 5281110 },
+    { url = "https://files.pythonhosted.org/packages/44/d6/b79754ca790f315918732e18f82a8146d33bcd7f4494380457ea89eb883d/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d", size = 4689547 },
+    { url = "https://files.pythonhosted.org/packages/49/20/716b8717d331150cb00f7fdd78169c01e8e0c219732a78b0e59b6bdb2fd6/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced", size = 5901554 },
+    { url = "https://files.pythonhosted.org/packages/74/cf/a9f3a2514a65bb071075063a96f0a5cf949c2f2fce683c15ccc83b1c1cab/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c", size = 7669132 },
+    { url = "https://files.pythonhosted.org/packages/98/3c/da78805cbdbee9cb43efe8261dd7cc0b4b93f2ac79b676c03159e9db2187/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8", size = 6005001 },
+    { url = "https://files.pythonhosted.org/packages/6c/fa/ce044b91faecf30e635321351bba32bab5a7e034c60187fe9698191aef4f/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59", size = 6668814 },
+    { url = "https://files.pythonhosted.org/packages/7b/51/90f9291406d09bf93686434f9183aba27b831c10c87746ff49f127ee80cb/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe", size = 6113124 },
+    { url = "https://files.pythonhosted.org/packages/cd/5a/6fec59b1dfb619234f7636d4157d11fb4e196caeee220232a8d2ec48488d/pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c", size = 6747186 },
+    { url = "https://files.pythonhosted.org/packages/49/6b/00187a044f98255225f172de653941e61da37104a9ea60e4f6887717e2b5/pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788", size = 6277546 },
+    { url = "https://files.pythonhosted.org/packages/e8/5c/6caaba7e261c0d75bab23be79f1d06b5ad2a2ae49f028ccec801b0e853d6/pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31", size = 6985102 },
+    { url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e", size = 2424803 },
+    { url = "https://files.pythonhosted.org/packages/73/f4/04905af42837292ed86cb1b1dabe03dce1edc008ef14c473c5c7e1443c5d/pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12", size = 5278520 },
+    { url = "https://files.pythonhosted.org/packages/41/b0/33d79e377a336247df6348a54e6d2a2b85d644ca202555e3faa0cf811ecc/pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a", size = 4686116 },
+    { url = "https://files.pythonhosted.org/packages/49/2d/ed8bc0ab219ae8768f529597d9509d184fe8a6c4741a6864fea334d25f3f/pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632", size = 5864597 },
+    { url = "https://files.pythonhosted.org/packages/b5/3d/b932bb4225c80b58dfadaca9d42d08d0b7064d2d1791b6a237f87f661834/pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673", size = 7638246 },
+    { url = "https://files.pythonhosted.org/packages/09/b5/0487044b7c096f1b48f0d7ad416472c02e0e4bf6919541b111efd3cae690/pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027", size = 5973336 },
+    { url = "https://files.pythonhosted.org/packages/a8/2d/524f9318f6cbfcc79fbc004801ea6b607ec3f843977652fdee4857a7568b/pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77", size = 6642699 },
+    { url = "https://files.pythonhosted.org/packages/6f/d2/a9a4f280c6aefedce1e8f615baaa5474e0701d86dd6f1dede66726462bbd/pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874", size = 6083789 },
+    { url = "https://files.pythonhosted.org/packages/fe/54/86b0cd9dbb683a9d5e960b66c7379e821a19be4ac5810e2e5a715c09a0c0/pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a", size = 6720386 },
+    { url = "https://files.pythonhosted.org/packages/e7/95/88efcaf384c3588e24259c4203b909cbe3e3c2d887af9e938c2022c9dd48/pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214", size = 6370911 },
+    { url = "https://files.pythonhosted.org/packages/2e/cc/934e5820850ec5eb107e7b1a72dd278140731c669f396110ebc326f2a503/pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635", size = 7117383 },
+    { url = "https://files.pythonhosted.org/packages/d6/e9/9c0a616a71da2a5d163aa37405e8aced9a906d574b4a214bede134e731bc/pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6", size = 2511385 },
+    { url = "https://files.pythonhosted.org/packages/1a/33/c88376898aff369658b225262cd4f2659b13e8178e7534df9e6e1fa289f6/pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae", size = 5281129 },
+    { url = "https://files.pythonhosted.org/packages/1f/70/d376247fb36f1844b42910911c83a02d5544ebd2a8bad9efcc0f707ea774/pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653", size = 4689580 },
+    { url = "https://files.pythonhosted.org/packages/eb/1c/537e930496149fbac69efd2fc4329035bbe2e5475b4165439e3be9cb183b/pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6", size = 5902860 },
+    { url = "https://files.pythonhosted.org/packages/bd/57/80f53264954dcefeebcf9dae6e3eb1daea1b488f0be8b8fef12f79a3eb10/pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36", size = 7670694 },
+    { url = "https://files.pythonhosted.org/packages/70/ff/4727d3b71a8578b4587d9c276e90efad2d6fe0335fd76742a6da08132e8c/pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b", size = 6005888 },
+    { url = "https://files.pythonhosted.org/packages/05/ae/716592277934f85d3be51d7256f3636672d7b1abfafdc42cf3f8cbd4b4c8/pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477", size = 6670330 },
+    { url = "https://files.pythonhosted.org/packages/e7/bb/7fe6cddcc8827b01b1a9766f5fdeb7418680744f9082035bdbabecf1d57f/pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50", size = 6114089 },
+    { url = "https://files.pythonhosted.org/packages/8b/f5/06bfaa444c8e80f1a8e4bff98da9c83b37b5be3b1deaa43d27a0db37ef84/pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b", size = 6748206 },
+    { url = "https://files.pythonhosted.org/packages/f0/77/bc6f92a3e8e6e46c0ca78abfffec0037845800ea38c73483760362804c41/pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12", size = 6377370 },
+    { url = "https://files.pythonhosted.org/packages/4a/82/3a721f7d69dca802befb8af08b7c79ebcab461007ce1c18bd91a5d5896f9/pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db", size = 7121500 },
+    { url = "https://files.pythonhosted.org/packages/89/c7/5572fa4a3f45740eaab6ae86fcdf7195b55beac1371ac8c619d880cfe948/pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa", size = 2512835 },
+    { url = "https://files.pythonhosted.org/packages/9e/e3/6fa84033758276fb31da12e5fb66ad747ae83b93c67af17f8c6ff4cc8f34/pillow-11.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7c8ec7a017ad1bd562f93dbd8505763e688d388cde6e4a010ae1486916e713e6", size = 5270566 },
+    { url = "https://files.pythonhosted.org/packages/5b/ee/e8d2e1ab4892970b561e1ba96cbd59c0d28cf66737fc44abb2aec3795a4e/pillow-11.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9ab6ae226de48019caa8074894544af5b53a117ccb9d3b3dcb2871464c829438", size = 4654618 },
+    { url = "https://files.pythonhosted.org/packages/f2/6d/17f80f4e1f0761f02160fc433abd4109fa1548dcfdca46cfdadaf9efa565/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe27fb049cdcca11f11a7bfda64043c37b30e6b91f10cb5bab275806c32f6ab3", size = 4874248 },
+    { url = "https://files.pythonhosted.org/packages/de/5f/c22340acd61cef960130585bbe2120e2fd8434c214802f07e8c03596b17e/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:465b9e8844e3c3519a983d58b80be3f668e2a7a5db97f2784e7079fbc9f9822c", size = 6583963 },
+    { url = "https://files.pythonhosted.org/packages/31/5e/03966aedfbfcbb4d5f8aa042452d3361f325b963ebbadddac05b122e47dd/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5418b53c0d59b3824d05e029669efa023bbef0f3e92e75ec8428f3799487f361", size = 4957170 },
+    { url = "https://files.pythonhosted.org/packages/cc/2d/e082982aacc927fc2cab48e1e731bdb1643a1406acace8bed0900a61464e/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:504b6f59505f08ae014f724b6207ff6222662aab5cc9542577fb084ed0676ac7", size = 5581505 },
+    { url = "https://files.pythonhosted.org/packages/34/e7/ae39f538fd6844e982063c3a5e4598b8ced43b9633baa3a85ef33af8c05c/pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8", size = 6984598 },
+]
+
+[[package]]
+name = "pyinstaller"
+version = "6.16.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "altgraph" },
+    { name = "macholib", marker = "sys_platform == 'darwin'" },
+    { name = "packaging" },
+    { name = "pefile", marker = "sys_platform == 'win32'" },
+    { name = "pyinstaller-hooks-contrib" },
+    { name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/94/94/1f62e95e4a28b64cfbb5b922ef3046f968b47170d37a1e1a029f56ac9cb4/pyinstaller-6.16.0.tar.gz", hash = "sha256:53559fe1e041a234f2b4dcc3288ea8bdd57f7cad8a6644e422c27bb407f3edef", size = 4008473 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7b/0a/c42ce6e5d3de287f2e9432a074fb209f1fb72a86a72f3903849fdb5e4829/pyinstaller-6.16.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:7fd1c785219a87ca747c21fa92f561b0d2926a7edc06d0a0fe37f3736e00bd7a", size = 1027899 },
+    { url = "https://files.pythonhosted.org/packages/4e/d0/f18fedde32835d5a758f464c75924e2154065625f09d5456c3c303527654/pyinstaller-6.16.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:b756ddb9007b8141c5476b553351f9d97559b8af5d07f9460869bfae02be26b0", size = 727990 },
+    { url = "https://files.pythonhosted.org/packages/7a/db/c8bb47514ce857b24bf9294cf1ff74844b6a489fa0ab4ef6f923288c4e38/pyinstaller-6.16.0-py3-none-manylinux2014_i686.whl", hash = "sha256:0a48f55b85ff60f83169e10050f2759019cf1d06773ad1c4da3a411cd8751058", size = 739238 },
+    { url = "https://files.pythonhosted.org/packages/c6/3e/451dc784a8fcca0fe9f9b6b802d58555364a95b60f253613a2c83fc6b023/pyinstaller-6.16.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:73ba72e04fcece92e32518bbb1e1fb5ac2892677943dfdff38e01a06e8742851", size = 737142 },
+    { url = "https://files.pythonhosted.org/packages/71/37/2f457479ef8fa2821cdb448acee2421dfb19fbe908bf5499d1930c164084/pyinstaller-6.16.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:b1752488248f7899281b17ca3238eefb5410521291371a686a4f5830f29f52b3", size = 734133 },
+    { url = "https://files.pythonhosted.org/packages/63/c4/0f7daac4d062a4d1ac2571d8a8b9b5d6812094fcd914d139af591ca5e1ba/pyinstaller-6.16.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ba618a61627ee674d6d68e5de084ba17c707b59a4f2a856084b3999bdffbd3f0", size = 733817 },
+    { url = "https://files.pythonhosted.org/packages/11/e4/b6127265b42bef883e8873d850becadf748bc5652e5a7029b059328f3c31/pyinstaller-6.16.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:c8b7ef536711617e12fef4673806198872033fa06fa92326ad7fd1d84a9fa454", size = 732912 },
+    { url = "https://files.pythonhosted.org/packages/2b/00/c6663107bdf814b2916e71563beabd09f693c47712213bc228994cb2cc65/pyinstaller-6.16.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:d1ebf84d02c51fed19b82a8abb4df536923abd55bb684d694e1356e4ae2a0ce5", size = 732773 },
+    { url = "https://files.pythonhosted.org/packages/a3/14/cabe9bc5f60b95d2e70e7d045ab94b0015ff8f6c8b16e2142d3597e30749/pyinstaller-6.16.0-py3-none-win32.whl", hash = "sha256:6d5f8617f3650ff9ef893e2ab4ddbf3c0d23d0c602ef74b5df8fbef4607840c8", size = 1313878 },
+    { url = "https://files.pythonhosted.org/packages/aa/99/2005efbc297e7813c1d6f18484aa94a1a81ce87b6a5b497c563681f4c4ea/pyinstaller-6.16.0-py3-none-win_amd64.whl", hash = "sha256:bc10eb1a787f99fea613509f55b902fbd2d8b73ff5f51ff245ea29a481d97d41", size = 1374706 },
+    { url = "https://files.pythonhosted.org/packages/ca/f4/4dfcf69b86d60fcaae05a42bbff1616d48a91e71726e5ed795d773dae9b3/pyinstaller-6.16.0-py3-none-win_arm64.whl", hash = "sha256:d0af8a401de792c233c32c44b16d065ca9ab8262ee0c906835c12bdebc992a64", size = 1315923 },
+]
+
+[[package]]
+name = "pyinstaller-hooks-contrib"
+version = "2026.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "packaging" },
+    { name = "setuptools" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/80/17/716326f6ba18d0663f7995ae369c23e50efebc22fbb054e9710a45688f61/pyinstaller_hooks_contrib-2026.3.tar.gz", hash = "sha256:800d3a198a49a6cd0de2d7fb795005fdca7a0222ed9cb47c0691abd1c27b9310", size = 172323 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ed/19/781352446af28755f16ce52b2d97f7a6f2d7974ac34c00ca5cd8c40c9098/pyinstaller_hooks_contrib-2026.3-py3-none-any.whl", hash = "sha256:5ecd1068ad262afecadf07556279d2be52ca93a88b049fae17f1a2eb2969254a", size = 454625 },
+]
+
+[[package]]
+name = "pyside6"
+version = "6.9.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyside6-addons" },
+    { name = "pyside6-essentials" },
+    { name = "shiboken6" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/43/42/43577413bd5ab26f5f21e7a43c9396aac158a5d01900c87e4609c0e96278/pyside6-6.9.2-cp39-abi3-macosx_12_0_universal2.whl", hash = "sha256:71245c76bfbe5c41794ffd8546730ec7cc869d4bbe68535639e026e4ef8a7714", size = 558102 },
+    { url = "https://files.pythonhosted.org/packages/12/df/cb84f802df3dcc1d196d2f9f37dbb8227761826f936987c9386b8ae1ffcc/pyside6-6.9.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:64a9e2146e207d858e00226f68d7c1b4ab332954742a00dcabb721bb9e4aa0cd", size = 558243 },
+    { url = "https://files.pythonhosted.org/packages/94/2d/715db9da437b4632d06e2c4718aee9937760b84cf36c23d5441989e581b0/pyside6-6.9.2-cp39-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:a78fad16241a1f2ed0fa0098cf3d621f591fc75b4badb7f3fa3959c9d861c806", size = 558245 },
+    { url = "https://files.pythonhosted.org/packages/59/90/2e75cbff0e17f16b83d2b7e8434ae9175cae8d6ff816c9b56d307cf53c86/pyside6-6.9.2-cp39-abi3-win_amd64.whl", hash = "sha256:d1afbf48f9a5612b9ee2dc7c384c1a65c08b5830ba5e7d01f66d82678e5459df", size = 564604 },
+    { url = "https://files.pythonhosted.org/packages/dc/34/e3dd4e046673efcbcfbe0aa2760df06b2877739b8f4da60f0229379adebd/pyside6-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:1499b1d7629ab92119118e2636b4ace836b25e457ddf01003fdca560560b8c0a", size = 401833 },
+]
+
+[[package]]
+name = "pyside6-addons"
+version = "6.9.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pyside6-essentials" },
+    { name = "shiboken6" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/47/39/a8f4a55001b6a0aaee042e706de2447f21c6dc2a610f3d3debb7d04db821/pyside6_addons-6.9.2-cp39-abi3-macosx_12_0_universal2.whl", hash = "sha256:7019fdcc0059626eb1608b361371f4dc8cb7f2d02f066908fd460739ff5a07cd", size = 316693692 },
+    { url = "https://files.pythonhosted.org/packages/14/48/0b16e9dabd4cafe02d59531832bc30b6f0e14c92076e90dd02379d365cb2/pyside6_addons-6.9.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:24350e5415317f269e743d1f7b4933fe5f59d90894aa067676c9ce6bfe9e7988", size = 166984613 },
+    { url = "https://files.pythonhosted.org/packages/f4/55/dc42a73387379bae82f921b7659cd2006ec0e80f7052f83ddc07e9eb9cca/pyside6_addons-6.9.2-cp39-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:af8dee517de8d336735a6543f7dd496eb580e852c14b4d2304b890e2a29de499", size = 162908466 },
+    { url = "https://files.pythonhosted.org/packages/14/fa/396a2e86230c493b565e2dc89dc64e4b1c63582ac69afe77b693c3817a53/pyside6_addons-6.9.2-cp39-abi3-win_amd64.whl", hash = "sha256:98d2413904ee4b2b754b077af7875fa6ec08468c01a6628a2c9c3d2cece4874f", size = 160216647 },
+    { url = "https://files.pythonhosted.org/packages/a7/fe/25f61259f1d5ec4648c9f6d2abd8e2cba2188f10735a57abafda719958e5/pyside6_addons-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:b430cae782ff1a99fb95868043557f22c31b30c94afb9cf73278584e220a2ab6", size = 27126649 },
+]
+
+[[package]]
+name = "pyside6-essentials"
+version = "6.9.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "shiboken6" },
+]
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/08/21/41960c03721a99e7be99a96ebb8570bdfd6f76f512b5d09074365e27ce28/pyside6_essentials-6.9.2-cp39-abi3-macosx_12_0_universal2.whl", hash = "sha256:713eb8dcbb016ff10e6fca129c1bf2a0fd8cfac979e689264e0be3b332f9398e", size = 133092348 },
+    { url = "https://files.pythonhosted.org/packages/3e/02/e38ff18f3d2d8d3071aa6823031aad6089267aa4668181db65ce9948bfc0/pyside6_essentials-6.9.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:84b8ca4fa56506e2848bdb4c7a0851a5e7adcb916bef9bce25ce2eeb6c7002cc", size = 96569791 },
+    { url = "https://files.pythonhosted.org/packages/9a/a1/1203d4db6919b42a937d9ac5ddb84b20ea42eb119f7c1ddeb77cb8fdb00c/pyside6_essentials-6.9.2-cp39-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:d0f701503974bd51b408966539aa6956f3d8536e547ea8002fbfb3d77796bbc3", size = 94311809 },
+    { url = "https://files.pythonhosted.org/packages/a8/e3/3b3e869d3e332b6db93f6f64fac3b12f5c48b84f03f2aa50ee5c044ec0de/pyside6_essentials-6.9.2-cp39-abi3-win_amd64.whl", hash = "sha256:b2f746f795138ac63eb173f9850a6db293461a1b6ce22cf6dafac7d194a38951", size = 72624566 },
+    { url = "https://files.pythonhosted.org/packages/91/70/db78afc8b60b2e53f99145bde2f644cca43924a4dd869ffe664e0792730a/pyside6_essentials-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:ecd7b5cd9e271f397fb89a6357f4ec301d8163e50869c6c557f9ccc6bed42789", size = 49561720 },
+]
+
+[[package]]
+name = "pywin32-ctypes"
+version = "0.2.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756 },
+]
+
+[[package]]
+name = "requests"
+version = "2.32.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "charset-normalizer" },
+    { name = "idna" },
+    { name = "urllib3" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c9/74/b3ff8e6c8446842c3f5c837e9c3dfcfe2018ea6ecef224c710c85ef728f4/requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf", size = 134517 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738 },
+]
+
+[[package]]
+name = "setuptools"
+version = "82.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4f/db/cfac1baf10650ab4d1c111714410d2fbb77ac5a616db26775db562c8fab2/setuptools-82.0.1.tar.gz", hash = "sha256:7d872682c5d01cfde07da7bccc7b65469d3dca203318515ada1de5eda35efbf9", size = 1152316 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9d/76/f789f7a86709c6b087c5a2f52f911838cad707cc613162401badc665acfe/setuptools-82.0.1-py3-none-any.whl", hash = "sha256:a59e362652f08dcd477c78bb6e7bd9d80a7995bc73ce773050228a348ce2e5bb", size = 1006223 },
+]
+
+[[package]]
+name = "shiboken6"
+version = "6.9.2"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1a/1e/62a8757aa0aa8d5dbf876f6cb6f652a60be9852e7911b59269dd983a7fb5/shiboken6-6.9.2-cp39-abi3-macosx_12_0_universal2.whl", hash = "sha256:8bb1c4326330e53adeac98bfd9dcf57f5173a50318a180938dcc4825d9ca38da", size = 406337 },
+    { url = "https://files.pythonhosted.org/packages/3b/bb/72a8ed0f0542d9ea935f385b396ee6a4bbd94749c817cbf2be34e80a16d3/shiboken6-6.9.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3b54c0a12ea1b03b9dc5dcfb603c366e957dc75341bf7cb1cc436d0d848308ee", size = 206733 },
+    { url = "https://files.pythonhosted.org/packages/52/c4/09e902f5612a509cef2c8712c516e4fe44f3a1ae9fcd8921baddb5e6bae4/shiboken6-6.9.2-cp39-abi3-manylinux_2_39_aarch64.whl", hash = "sha256:a5f5985938f5acb604c23536a0ff2efb3cccb77d23da91fbaff8fd8ded3dceb4", size = 202784 },
+    { url = "https://files.pythonhosted.org/packages/a4/ea/a56b094a4bf6facf89f52f58e83684e168b1be08c14feb8b99969f3d4189/shiboken6-6.9.2-cp39-abi3-win_amd64.whl", hash = "sha256:68c33d565cd4732be762d19ff67dfc53763256bac413d392aa8598b524980bc4", size = 1152089 },
+    { url = "https://files.pythonhosted.org/packages/48/64/562a527fc55fbf41fa70dae735929988215505cb5ec0809fb0aef921d4a0/shiboken6-6.9.2-cp39-abi3-win_arm64.whl", hash = "sha256:c5b827797b3d89d9b9a3753371ff533fcd4afc4531ca51a7c696952132098054", size = 1708948 },
+]
+
+[[package]]
+name = "simple-websocket"
+version = "1.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "wsproto" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b0/d4/bfa032f961103eba93de583b161f0e6a5b63cebb8f2c7d0c6e6efe1e3d2e/simple_websocket-1.1.0.tar.gz", hash = "sha256:7939234e7aa067c534abdab3a9ed933ec9ce4691b0713c78acb195560aa52ae4", size = 17300 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/59/0782e51887ac6b07ffd1570e0364cf901ebc36345fea669969d2084baebb/simple_websocket-1.1.0-py3-none-any.whl", hash = "sha256:4af6069630a38ed6c561010f0e11a5bc0d4ca569b36306eb257cd9a192497c8c", size = 13842 },
+]
+
+[[package]]
+name = "urllib3"
+version = "2.5.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/15/22/9ee70a2574a4f4599c47dd506532914ce044817c7752a79b6a51286319bc/urllib3-2.5.0.tar.gz", hash = "sha256:3fc47733c7e419d4bc3f6b3dc2b4f890bb743906a30d56ba4a5bfa4bbff92760", size = 393185 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a7/c2/fe1e52489ae3122415c51f387e221dd0773709bad6c6cdaa599e8a2c5185/urllib3-2.5.0-py3-none-any.whl", hash = "sha256:e6b01673c0fa6a13e374b50871808eb3bf7046c4b125b216f6bf1cc604cff0dc", size = 129795 },
+]
+
+[[package]]
+name = "websockets"
+version = "15.0.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423 },
+    { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082 },
+    { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330 },
+    { url = "https://files.pythonhosted.org/packages/a5/90/1c37ae8b8a113d3daf1065222b6af61cc44102da95388ac0018fcb7d93d9/websockets-15.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d99e5546bf73dbad5bf3547174cd6cb8ba7273062a23808ffea025ecb1cf8562", size = 182878 },
+    { url = "https://files.pythonhosted.org/packages/8e/8d/96e8e288b2a41dffafb78e8904ea7367ee4f891dafc2ab8d87e2124cb3d3/websockets-15.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66dd88c918e3287efc22409d426c8f729688d89a0c587c88971a0faa2c2f3792", size = 181883 },
+    { url = "https://files.pythonhosted.org/packages/93/1f/5d6dbf551766308f6f50f8baf8e9860be6182911e8106da7a7f73785f4c4/websockets-15.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dd8327c795b3e3f219760fa603dcae1dcc148172290a8ab15158cf85a953413", size = 182252 },
+    { url = "https://files.pythonhosted.org/packages/d4/78/2d4fed9123e6620cbf1706c0de8a1632e1a28e7774d94346d7de1bba2ca3/websockets-15.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8fdc51055e6ff4adeb88d58a11042ec9a5eae317a0a53d12c062c8a8865909e8", size = 182521 },
+    { url = "https://files.pythonhosted.org/packages/e7/3b/66d4c1b444dd1a9823c4a81f50231b921bab54eee2f69e70319b4e21f1ca/websockets-15.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:693f0192126df6c2327cce3baa7c06f2a117575e32ab2308f7f8216c29d9e2e3", size = 181958 },
+    { url = "https://files.pythonhosted.org/packages/08/ff/e9eed2ee5fed6f76fdd6032ca5cd38c57ca9661430bb3d5fb2872dc8703c/websockets-15.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:54479983bd5fb469c38f2f5c7e3a24f9a4e70594cd68cd1fa6b9340dadaff7cf", size = 181918 },
+    { url = "https://files.pythonhosted.org/packages/d8/75/994634a49b7e12532be6a42103597b71098fd25900f7437d6055ed39930a/websockets-15.0.1-cp311-cp311-win32.whl", hash = "sha256:16b6c1b3e57799b9d38427dda63edcbe4926352c47cf88588c0be4ace18dac85", size = 176388 },
+    { url = "https://files.pythonhosted.org/packages/98/93/e36c73f78400a65f5e236cd376713c34182e6663f6889cd45a4a04d8f203/websockets-15.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:27ccee0071a0e75d22cb35849b1db43f2ecd3e161041ac1ee9d2352ddf72f065", size = 176828 },
+    { url = "https://files.pythonhosted.org/packages/51/6b/4545a0d843594f5d0771e86463606a3988b5a09ca5123136f8a76580dd63/websockets-15.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3e90baa811a5d73f3ca0bcbf32064d663ed81318ab225ee4f427ad4e26e5aff3", size = 175437 },
+    { url = "https://files.pythonhosted.org/packages/f4/71/809a0f5f6a06522af902e0f2ea2757f71ead94610010cf570ab5c98e99ed/websockets-15.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:592f1a9fe869c778694f0aa806ba0374e97648ab57936f092fd9d87f8bc03665", size = 173096 },
+    { url = "https://files.pythonhosted.org/packages/3d/69/1a681dd6f02180916f116894181eab8b2e25b31e484c5d0eae637ec01f7c/websockets-15.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0701bc3cfcb9164d04a14b149fd74be7347a530ad3bbf15ab2c678a2cd3dd9a2", size = 173332 },
+    { url = "https://files.pythonhosted.org/packages/a6/02/0073b3952f5bce97eafbb35757f8d0d54812b6174ed8dd952aa08429bcc3/websockets-15.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e8b56bdcdb4505c8078cb6c7157d9811a85790f2f2b3632c7d1462ab5783d215", size = 183152 },
+    { url = "https://files.pythonhosted.org/packages/74/45/c205c8480eafd114b428284840da0b1be9ffd0e4f87338dc95dc6ff961a1/websockets-15.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0af68c55afbd5f07986df82831c7bff04846928ea8d1fd7f30052638788bc9b5", size = 182096 },
+    { url = "https://files.pythonhosted.org/packages/14/8f/aa61f528fba38578ec553c145857a181384c72b98156f858ca5c8e82d9d3/websockets-15.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:64dee438fed052b52e4f98f76c5790513235efaa1ef7f3f2192c392cd7c91b65", size = 182523 },
+    { url = "https://files.pythonhosted.org/packages/ec/6d/0267396610add5bc0d0d3e77f546d4cd287200804fe02323797de77dbce9/websockets-15.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d5f6b181bb38171a8ad1d6aa58a67a6aa9d4b38d0f8c5f496b9e42561dfc62fe", size = 182790 },
+    { url = "https://files.pythonhosted.org/packages/02/05/c68c5adbf679cf610ae2f74a9b871ae84564462955d991178f95a1ddb7dd/websockets-15.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5d54b09eba2bada6011aea5375542a157637b91029687eb4fdb2dab11059c1b4", size = 182165 },
+    { url = "https://files.pythonhosted.org/packages/29/93/bb672df7b2f5faac89761cb5fa34f5cec45a4026c383a4b5761c6cea5c16/websockets-15.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3be571a8b5afed347da347bfcf27ba12b069d9d7f42cb8c7028b5e98bbb12597", size = 182160 },
+    { url = "https://files.pythonhosted.org/packages/ff/83/de1f7709376dc3ca9b7eeb4b9a07b4526b14876b6d372a4dc62312bebee0/websockets-15.0.1-cp312-cp312-win32.whl", hash = "sha256:c338ffa0520bdb12fbc527265235639fb76e7bc7faafbb93f6ba80d9c06578a9", size = 176395 },
+    { url = "https://files.pythonhosted.org/packages/7d/71/abf2ebc3bbfa40f391ce1428c7168fb20582d0ff57019b69ea20fa698043/websockets-15.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:fcd5cf9e305d7b8338754470cf69cf81f420459dbae8a3b40cee57417f4614a7", size = 176841 },
+    { url = "https://files.pythonhosted.org/packages/cb/9f/51f0cf64471a9d2b4d0fc6c534f323b664e7095640c34562f5182e5a7195/websockets-15.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee443ef070bb3b6ed74514f5efaa37a252af57c90eb33b956d35c8e9c10a1931", size = 175440 },
+    { url = "https://files.pythonhosted.org/packages/8a/05/aa116ec9943c718905997412c5989f7ed671bc0188ee2ba89520e8765d7b/websockets-15.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:5a939de6b7b4e18ca683218320fc67ea886038265fd1ed30173f5ce3f8e85675", size = 173098 },
+    { url = "https://files.pythonhosted.org/packages/ff/0b/33cef55ff24f2d92924923c99926dcce78e7bd922d649467f0eda8368923/websockets-15.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:746ee8dba912cd6fc889a8147168991d50ed70447bf18bcda7039f7d2e3d9151", size = 173329 },
+    { url = "https://files.pythonhosted.org/packages/31/1d/063b25dcc01faa8fada1469bdf769de3768b7044eac9d41f734fd7b6ad6d/websockets-15.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:595b6c3969023ecf9041b2936ac3827e4623bfa3ccf007575f04c5a6aa318c22", size = 183111 },
+    { url = "https://files.pythonhosted.org/packages/93/53/9a87ee494a51bf63e4ec9241c1ccc4f7c2f45fff85d5bde2ff74fcb68b9e/websockets-15.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c714d2fc58b5ca3e285461a4cc0c9a66bd0e24c5da9911e30158286c9b5be7f", size = 182054 },
+    { url = "https://files.pythonhosted.org/packages/ff/b2/83a6ddf56cdcbad4e3d841fcc55d6ba7d19aeb89c50f24dd7e859ec0805f/websockets-15.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f3c1e2ab208db911594ae5b4f79addeb3501604a165019dd221c0bdcabe4db8", size = 182496 },
+    { url = "https://files.pythonhosted.org/packages/98/41/e7038944ed0abf34c45aa4635ba28136f06052e08fc2168520bb8b25149f/websockets-15.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:229cf1d3ca6c1804400b0a9790dc66528e08a6a1feec0d5040e8b9eb14422375", size = 182829 },
+    { url = "https://files.pythonhosted.org/packages/e0/17/de15b6158680c7623c6ef0db361da965ab25d813ae54fcfeae2e5b9ef910/websockets-15.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:756c56e867a90fb00177d530dca4b097dd753cde348448a1012ed6c5131f8b7d", size = 182217 },
+    { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195 },
+    { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393 },
+    { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837 },
+    { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743 },
+]
+
+[[package]]
+name = "werkzeug"
+version = "3.1.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/9f/69/83029f1f6300c5fb2471d621ab06f6ec6b3324685a2ce0f9777fd4a8b71e/werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746", size = 806925 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/52/24/ab44c871b0f07f491e5d2ad12c9bd7358e527510618cb1b803a88e986db1/werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e", size = 224498 },
+]
+
+[[package]]
+name = "wsproto"
+version = "1.3.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c7/79/12135bdf8b9c9367b8701c2c19a14c913c120b882d50b014ca0d38083c2c/wsproto-1.3.2.tar.gz", hash = "sha256:b86885dcf294e15204919950f666e06ffc6c7c114ca900b060d6e16293528294", size = 50116 }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a4/f5/10b68b7b1544245097b2a1b8238f66f2fc6dcaeb24ba5d917f52bd2eed4f/wsproto-1.3.2-py3-none-any.whl", hash = "sha256:61eea322cdf56e8cc904bd3ad7573359a242ba65688716b0710a5eb12beab584", size = 24405 },
+]