Spaces:

aiqknow
/

CheckMat

Sleeping

App Files Files Community

aiqknow commited on 17 days ago

Commit

35205e8

verified ·

1 Parent(s): a088295

Upload 97 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.README.md.swp +0 -0
.env.example +26 -0
.github/workflows/ci.yml +20 -0
.github/workflows/release.yml +229 -0
.gitignore +21 -0
CONTRIBUTING.md +37 -0
DOCKER.md +41 -0
Dockerfile +20 -0
LICENSE +21 -0
README.md +209 -10
build.py +225 -0
build/lib/chatmock/__init__.py +5 -0
build/lib/chatmock/app.py +56 -0
build/lib/chatmock/cli.py +425 -0
build/lib/chatmock/config.py +48 -0
build/lib/chatmock/fast_mode.py +92 -0
build/lib/chatmock/http.py +24 -0
build/lib/chatmock/limits.py +200 -0
build/lib/chatmock/model_registry.py +198 -0
build/lib/chatmock/models.py +26 -0
build/lib/chatmock/oauth.py +340 -0
build/lib/chatmock/prompt.md +1 -0
build/lib/chatmock/prompt_gpt5_codex.md +1 -0
build/lib/chatmock/reasoning.py +79 -0
build/lib/chatmock/responses_api.py +243 -0
build/lib/chatmock/routes_ollama.py +585 -0
build/lib/chatmock/routes_openai.py +738 -0
build/lib/chatmock/session.py +312 -0
build/lib/chatmock/transform.py +149 -0
build/lib/chatmock/upstream.py +181 -0
build/lib/chatmock/utils.py +874 -0
build/lib/chatmock/version.py +4 -0
build/lib/chatmock/websocket_routes.py +225 -0
chatmock.egg-info/PKG-INFO +200 -0
chatmock.egg-info/SOURCES.txt +34 -0
chatmock.egg-info/dependency_links.txt +1 -0
chatmock.egg-info/entry_points.txt +2 -0
chatmock.egg-info/requires.txt +17 -0
chatmock.egg-info/top_level.txt +1 -0
chatmock.py +7 -0
chatmock/__init__.py +5 -0
chatmock/__pycache__/__init__.cpython-314.pyc +0 -0
chatmock/__pycache__/app.cpython-314.pyc +0 -0
chatmock/__pycache__/cli.cpython-314.pyc +0 -0
chatmock/__pycache__/config.cpython-314.pyc +0 -0
chatmock/__pycache__/fast_mode.cpython-314.pyc +0 -0
chatmock/__pycache__/http.cpython-314.pyc +0 -0
chatmock/__pycache__/limits.cpython-314.pyc +0 -0
chatmock/__pycache__/model_registry.cpython-314.pyc +0 -0
chatmock/__pycache__/models.cpython-314.pyc +0 -0

.README.md.swp ADDED Viewed

Binary file (12.3 kB). View file

.env.example ADDED Viewed

	@@ -0,0 +1,26 @@

+# Port
+PORT=8000
+# Image
+CHATMOCK_IMAGE=storagetime/chatmock:latest
+# Auth dir
+CHATGPT_LOCAL_HOME=/data
+# show request/stream logs
+VERBOSE=false
+# OAuth client id (modify only if you know what you're doing)
+# CHATGPT_LOCAL_CLIENT_ID=app_EMoamEEZ73f0CkXaXp7hrann
+# Reasoning controls
+CHATGPT_LOCAL_REASONING_EFFORT=medium       # none|minimal|low|medium|high|xhigh
+CHATGPT_LOCAL_REASONING_SUMMARY=auto        # auto|concise|detailed|none
+CHATGPT_LOCAL_REASONING_COMPAT=think-tags   # legacy|o3|think-tags|current
+CHATGPT_LOCAL_EXPOSE_REASONING_MODELS=false
+# Enable default web search tool
+CHATGPT_LOCAL_ENABLE_WEB_SEARCH=false
+# Force a specific model name
+# CHATGPT_LOCAL_DEBUG_MODEL=gpt-5.4

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,20 @@

+name: ci
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv pip install --system .
+      - run: python -m unittest discover -s tests
+      - run: uv build

.github/workflows/release.yml ADDED Viewed

	@@ -0,0 +1,229 @@

+name: release
+on:
+  push:
+    tags:
+      - "v*"
+permissions:
+  contents: write
+jobs:
+  validate:
+    runs-on: ubuntu-latest
+    outputs:
+      version: ${{ steps.version.outputs.version }}
+      tag: ${{ steps.version.outputs.tag }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - id: version
+        run: |
+          VERSION="${GITHUB_REF_NAME#v}"
+          PACKAGE_VERSION="$(python - <<'PY'
+          import runpy
+          print(runpy.run_path("chatmock/version.py")["__version__"])
+          PY
+          )"
+          if [ "$VERSION" != "$PACKAGE_VERSION" ]; then
+            echo "Tag version $VERSION does not match package version $PACKAGE_VERSION" >&2
+            exit 1
+          fi
+          echo "version=$VERSION" >> "$GITHUB_OUTPUT"
+          echo "tag=${GITHUB_REF_NAME}" >> "$GITHUB_OUTPUT"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv pip install --system .
+      - run: python -m unittest discover -s tests
+  build-python:
+    needs: validate
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - uses: astral-sh/setup-uv@v5
+      - run: uv build
+      - uses: actions/upload-artifact@v4
+        with:
+          name: python-dist
+          path: dist/*
+  publish-pypi:
+    needs:
+      - validate
+      - build-python
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          name: python-dist
+          path: dist
+      - uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          packages-dir: dist
+  build-windows:
+    needs: validate
+    runs-on: windows-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: python -m pip install --upgrade pip
+      - run: python -m pip install ".[gui]"
+      - run: python build.py --name ChatMock
+      - run: Compress-Archive -Path dist/ChatMock -DestinationPath dist/ChatMock-windows.zip
+        shell: pwsh
+      - uses: actions/upload-artifact@v4
+        with:
+          name: windows-gui
+          path: dist/ChatMock-windows.zip
+  build-macos:
+    needs: validate
+    runs-on: macos-latest
+    env:
+      APPLE_CERTIFICATE_P12_BASE64: ${{ secrets.APPLE_CERTIFICATE_P12_BASE64 }}
+      APPLE_CERTIFICATE_PASSWORD: ${{ secrets.APPLE_CERTIFICATE_PASSWORD }}
+      APPLE_SIGNING_IDENTITY: ${{ secrets.APPLE_SIGNING_IDENTITY }}
+      APPLE_ID: ${{ secrets.APPLE_ID }}
+      APPLE_APP_SPECIFIC_PASSWORD: ${{ secrets.APPLE_APP_SPECIFIC_PASSWORD }}
+      APPLE_TEAM_ID: ${{ secrets.APPLE_TEAM_ID }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - run: python -m pip install --upgrade pip
+      - run: python -m pip install ".[gui]"
+      - run: |
+          security create-keychain -p "$RUNNER_TEMP" build.keychain
+          security default-keychain -s build.keychain
+          security unlock-keychain -p "$RUNNER_TEMP" build.keychain
+          security set-keychain-settings -lut 21600 build.keychain
+          python - <<'PY'
+          import base64
+          import os
+          from pathlib import Path
+          data = os.environ["APPLE_CERTIFICATE_P12_BASE64"]
+          Path(os.environ["RUNNER_TEMP"], "chatmock-signing.p12").write_bytes(base64.b64decode(data))
+          PY
+          security import "$RUNNER_TEMP/chatmock-signing.p12" -k build.keychain -P "$APPLE_CERTIFICATE_PASSWORD" -T /usr/bin/codesign -T /usr/bin/security
+          security set-key-partition-list -S apple-tool:,apple:,codesign: -s -k "$RUNNER_TEMP" build.keychain
+      - run: python build.py --name ChatMock
+      - run: codesign --force --deep --options runtime --sign "$APPLE_SIGNING_IDENTITY" dist/ChatMock.app
+      - run: codesign --verify --deep --strict dist/ChatMock.app
+      - run: python build.py --name ChatMock --dmg-only
+      - run: codesign --force --sign "$APPLE_SIGNING_IDENTITY" dist/ChatMock.dmg
+      - run: codesign --verify --strict dist/ChatMock.dmg
+      - run: xcrun notarytool submit dist/ChatMock.dmg --apple-id "$APPLE_ID" --password "$APPLE_APP_SPECIFIC_PASSWORD" --team-id "$APPLE_TEAM_ID" --wait
+      - run: xcrun stapler staple dist/ChatMock.dmg
+      - run: xcrun stapler validate dist/ChatMock.dmg
+      - uses: actions/upload-artifact@v4
+        with:
+          name: macos-gui
+          path: dist/ChatMock.dmg
+  docker:
+    needs: validate
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: docker/setup-qemu-action@v3
+      - uses: docker/setup-buildx-action@v3
+      - uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: storagetime/chatmock
+          tags: |
+            type=raw,value=latest
+            type=raw,value=${{ needs.validate.outputs.tag }}
+            type=raw,value=${{ needs.validate.outputs.version }}
+      - uses: docker/build-push-action@v6
+        with:
+          context: .
+          platforms: linux/amd64,linux/arm64
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+  homebrew:
+    needs: validate
+    runs-on: ubuntu-latest
+    steps:
+      - run: |
+          ARCHIVE_URL="https://github.com/${GITHUB_REPOSITORY}/archive/refs/tags/${GITHUB_REF_NAME}.tar.gz"
+          SHA256="$(curl -fsSL "$ARCHIVE_URL" | shasum -a 256 | awk '{print $1}')"
+          git clone "https://x-access-token:${{ secrets.HOMEBREW_TAP_TOKEN }}@github.com/RayBytes/homebrew-chatmock.git" tap
+          cd tap
+          cat <<EOF > chatmock.rb
+          class Chatmock < Formula
+            include Language::Python::Virtualenv
+            desc "OpenAI & Ollama compatible API powered by your ChatGPT plan"
+            homepage "https://github.com/RayBytes/ChatMock"
+            url "${ARCHIVE_URL}"
+            sha256 "${SHA256}"
+            license "MIT"
+            head "https://github.com/RayBytes/ChatMock.git", branch: "main"
+            depends_on "python@3.11"
+            def install
+              virtualenv_create(libexec, "python3.11")
+              system libexec/"bin/pip", "install", "."
+              bin.install_symlink libexec/"bin/chatmock"
+            end
+            def caveats
+              <<~EOS
+                To get started with ChatMock:
+                  chatmock login
+                  chatmock serve
+              EOS
+            end
+            test do
+              output = shell_output("#{bin}/chatmock --help 2>&1")
+              assert_match "ChatMock", output
+            end
+          end
+          EOF
+          git config user.name "github-actions[bot]"
+          git config user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git add chatmock.rb
+          git commit -m "chatmock ${GITHUB_REF_NAME}" || exit 0
+          git push
+  release-assets:
+    needs:
+      - validate
+      - build-python
+      - build-windows
+      - build-macos
+      - publish-pypi
+      - docker
+      - homebrew
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          path: release-artifacts
+      - run: find release-artifacts -type f | sort
+      - uses: softprops/action-gh-release@v2
+        with:
+          files: |
+            release-artifacts/python-dist/*
+            release-artifacts/windows-gui/*
+            release-artifacts/macos-gui/*

.gitignore ADDED Viewed

	@@ -0,0 +1,21 @@

+# Python bytecode
+__pycache__/
+*.py[cod]
+*$py.class
+# Virtual environments
+.env/
+.venv/
+venv/
+# Packaging artifacts
+build/
+dist/
+*.egg-info/
+# Tool caches
+.pytest_cache/
+.mypy_cache/
+# OS clutter
+.DS_Store

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,37 @@

+# Contributing to ChatMock
+We welcome thoughtful improvements. This guide calls out the expectations that keep reviews quick and the project stable.
+# How should I contribute?
+### Before changing code...
+- Open an issue before large or risky efforts so scope is agreed up front.
+- Keep pull requests focused and easy to follow & break sweeping changes into a series when possible.
+- Treat documentation, code, and packaging (CLI, Docker, GUI) as a single surface (your updates should apply to all).
+### Getting Set Up
+- Review the Quickstart section in README.md
+- Go through the codebase, and ensure you understand the current codebase.
+- Confirm you can log in and serve a local instance, then make a couple of sample requests to understand current behaviour so you know if it broke later on.
+### Working With Core Files
+- `prompt.md` and related Codex harness files are sensitive. Do not modify them or move entry points without prior maintainer approval.
+- Be cautious with parameter names, response payload shapes, and file locations consumed by downstream clients. Coordinate before changing them.
+- When touching shared logic, update both OpenAI and Ollama routes, plus any CLI/GUI code that depends on the same behaviour.
+## Designing Features and Fixes
+- Prefer opt-in flags or config switches for new capabilities & leave defaults unchanged until maintainers confirm the rollout plan.
+- Document any limits, or external dependencies introduced by your change.
+- Validate compatibility with popular clients (e.g. Jan, Raycast, custom OpenAI SDKs) when responses or streaming formats shift.
+# Pull Request Checklist
+- [ ] Rebased on the latest `main` and issue reference included when applicable.
+- [ ] Manual verification steps captured under "How to try locally" in the PR body.
+- [ ] README.md, DOCKER.md, and other docs updated—or explicitly noted as not required.
+- [ ] No generated artefacts or caches staged (`build/`, `dist/`, `__pycache__/`, `.pytest_cache/`, etc.).
+- [ ] Critical paths (`prompt.md`, routing modules, public parameter names) reviewed for unintended edits and discussed with maintainers if changes were necessary.
+## Need Help?
+- If you're not sure about about scope, flags, or how to implement a certain feature, always create an issue before hand.
+Thank you for you contribution!

DOCKER.md ADDED Viewed

	@@ -0,0 +1,41 @@

+# Docker Deployment
+## Quick Start
+1) Setup env:
+   cp .env.example .env
+2) Login:
+   docker compose run --rm --service-ports chatmock-login login
+   - The command prints an auth URL, copy paste it into your browser.
+   - If your browser cannot reach the container's localhost callback, copy the full redirect URL from the browser address bar and paste it back into the terminal when prompted.
+   - Server should stop automatically once it receives the tokens and they are saved.
+3) Start the server:
+   docker compose up -d chatmock
+4) Free to use it in whichever chat app you like!
+## Configuration
+Set options in `.env` or pass environment variables:
+- `PORT`: Container listening port (default 8000)
+- `CHATMOCK_IMAGE`: image tag to run (default `storagetime/chatmock:latest`)
+- `VERBOSE`: `true|false` to enable request/stream logs
+- `CHATGPT_LOCAL_REASONING_EFFORT`: minimal|low|medium|high|xhigh
+- `CHATGPT_LOCAL_REASONING_SUMMARY`: auto|concise|detailed|none
+- `CHATGPT_LOCAL_REASONING_COMPAT`: legacy|o3|think-tags|current
+- `CHATGPT_LOCAL_FAST_MODE`: `true|false` to enable fast mode by default for supported models
+- `CHATGPT_LOCAL_CLIENT_ID`: OAuth client id override (rarely needed)
+- `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS`: `true|false` to add reasoning model variants to `/v1/models`
+- `CHATGPT_LOCAL_ENABLE_WEB_SEARCH`: `true|false` to enable default web search tool
+## Logs
+Set `VERBOSE=true` to include extra logging for troubleshooting upstream or chat app requests. Please include and use these logs when submitting bug reports.
+## Test
+```
+curl -s http://localhost:8000/v1/chat/completions \
+   -H 'Content-Type: application/json' \
+   -d '{"model":"gpt-5-codex","messages":[{"role":"user","content":"Hello world!"}]}' | jq .
+```

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1
+WORKDIR /app
+COPY pyproject.toml README.md chatmock.py prompt.md prompt_gpt5_codex.md /app/
+COPY chatmock /app/chatmock
+RUN pip install --no-cache-dir .
+RUN mkdir -p /data
+COPY docker/entrypoint.sh /entrypoint.sh
+RUN chmod +x /entrypoint.sh
+EXPOSE 7860 1455
+ENTRYPOINT ["/entrypoint.sh"]
+CMD ["serve"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Game_Time
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,10 +1,209 @@
----
-title: CheckMat
-emoji: 💻
-colorFrom: indigo
-colorTo: indigo
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+<div align="center">
+# ChatMock
+**Allows Codex to work in your favourite chat apps and coding tools.**
+[![PyPI](https://img.shields.io/pypi/v/chatmock?color=blue&label=pypi)](https://pypi.org/project/chatmock/)
+[![Python](https://img.shields.io/pypi/pyversions/chatmock)](https://pypi.org/project/chatmock/)
+[![License](https://img.shields.io/github/license/RayBytes/ChatMock)](LICENSE)
+[![Stars](https://img.shields.io/github/stars/RayBytes/ChatMock?style=flat)](https://github.com/RayBytes/ChatMock/stargazers)
+[![Last Commit](https://img.shields.io/github/last-commit/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/commits/main)
+[![Issues](https://img.shields.io/github/issues/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/issues)
+<br>
+</div>
+<br>
+## Install
+#### Homebrew
+```bash
+brew tap RayBytes/chatmock
+brew install chatmock
+```
+#### pipx / pip
+```bash
+pipx install chatmock
+```
+#### GUI
+Download from [releases](https://github.com/RayBytes/ChatMock/releases) (macOS & Windows)
+#### Docker
+See [DOCKER.md](DOCKER.md)
+#### Hugging Face
+See [Hugging Face Deployment](#hugging-face-deployment)
+<br>
+## Getting Started
+```bash
+# 1. Sign in with your ChatGPT account
+chatmock login
+# 2. Start the server
+chatmock serve
+```
+The server runs at `http://127.0.0.1:8000` by default. Use `http://127.0.0.1:8000/v1` as your base URL for OpenAI-compatible apps.
+<br>
+## Usage
+<details open>
+<summary><b>Python</b></summary>
+```python
+from openai import OpenAI
+client = OpenAI(
+    base_url="http://127.0.0.1:8000/v1",
+    api_key="anything"  # not checked
+)
+response = client.chat.completions.create(
+    model="gpt-5.4",
+    messages=[{"role": "user", "content": "hello"}]
+)
+print(response.choices[0].message.content)
+```
+</details>
+<details>
+<summary><b>cURL</b></summary>
+```bash
+curl http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5.4",
+    "messages": [{"role": "user", "content": "hello"}]
+  }'
+```
+</details>
+<details>
+<summary><b>Custom API (Plain Text)</b></summary>
+```bash
+# Request format: {"prompt": "..."}
+# Response format: {"status": "success", "text": "..."}
+curl http://127.0.0.1:8000/api \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": "hello"}'
+# You can also specify the model in the URL
+curl http://127.0.0.1:8000/gpt-5.5/api \
+  -H "Content-Type: application/json" \
+  -d '{"prompt": "hello"}'
+```
+</details>
+<br>
+## Supported Models
+- `gpt-5.5`
+- `gpt-5.4`
+- `gpt-5.4-mini`
+- `gpt-5.2`
+- `gpt-5.1`
+- `gpt-5`
+- `gpt-5.3-codex`
+- `gpt-5.3-codex-spark`
+- `gpt-5.2-codex`
+- `gpt-5-codex`
+- `gpt-5.1-codex`
+- `gpt-5.1-codex-max`
+- `gpt-5.1-codex-mini`
+- `codex-mini`
+<br>
+## Features
+- Tool / function calling
+- Vision / image input
+- Thinking summaries (via think tags)
+- Configurable thinking effort
+- Fast mode for supported models
+- Web search tool
+- OpenAI-compatible `/v1/responses` (HTTP + WebSocket)
+- Ollama-compatible endpoints
+- Reasoning effort exposed as separate models (optional)
+<br>
+## Configuration
+All flags go after `chatmock serve`. These can also be set as environment variables.
+| Flag | Env var | Options | Default | Description |
+|------|---------|---------|---------|-------------|
+| `--reasoning-effort` | `CHATGPT_LOCAL_REASONING_EFFORT` | none, minimal, low, medium, high, xhigh | medium | How hard the model thinks |
+| `--reasoning-summary` | `CHATGPT_LOCAL_REASONING_SUMMARY` | auto, concise, detailed, none | auto | Thinking summary verbosity |
+| `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client |
+| `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models |
+| `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web |
+| `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |
+<details>
+<summary><b>Web search in a request</b></summary>
+```json
+{
+  "model": "gpt-5.4",
+  "messages": [{"role": "user", "content": "latest news on ..."}],
+  "responses_tools": [{"type": "web_search"}],
+  "responses_tool_choice": "auto"
+}
+```
+</details>
+<details>
+<summary><b>Fast mode in a request</b></summary>
+```json
+{
+  "model": "gpt-5.4",
+  "input": "summarize this",
+  "fast_mode": true
+}
+```
+</details>
+<br>
+## Notes
+Use responsibly and at your own risk. This project is not affiliated with OpenAI.
+<br>
+## Hugging Face Deployment
+1. **Get Auth**: Run `python chatmock.py info --json` locally and copy the output.
+2. **Create Space**: Create a new **Docker** Space on Hugging Face.
+3. **Upload**: Upload all project files to the Space.
+4. **Secret**: In Space Settings, add a secret named `AUTH_JSON` and paste your auth data as the value.
+5. **Done**: Your API will be available at `https://<user>-<space>.hf.space/api`
+<br>
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=RayBytes/ChatMock&type=Timeline)](https://www.star-history.com/#RayBytes/ChatMock&Timeline)

build.py ADDED Viewed

	@@ -0,0 +1,225 @@

+from __future__ import annotations
+import argparse
+import os
+import platform
+import shutil
+import subprocess
+import sys
+from pathlib import Path
+import plistlib
+from PIL import Image
+ROOT = Path(__file__).parent.resolve()
+BUILD_DIR = ROOT / "build"
+ICONS_DIR = BUILD_DIR / "icons"
+def info(msg: str) -> None:
+    print(f"[build] {msg}")
+def ensure_dirs() -> None:
+    ICONS_DIR.mkdir(parents=True, exist_ok=True)
+def load_icon_png(path: Path) -> Image.Image:
+    if Image is None:
+        raise RuntimeError("Pillow is required to process icons.")
+    img = Image.open(path).convert("RGBA")
+    size = max(img.width, img.height)
+    canvas = Image.new("RGBA", (size, size), (0, 0, 0, 0))
+    x = (size - img.width) // 2
+    y = (size - img.height) // 2
+    canvas.paste(img, (x, y))
+    return canvas
+def rounded(img: Image.Image, radius_ratio: float = 0.22) -> Image.Image:
+    if Image is None:
+        return img
+    w, h = img.size
+    r = int(min(w, h) * max(0.0, min(radius_ratio, 0.5)))
+    if r <= 0:
+        return img
+    mask = Image.new("L", (w, h), 0)
+    from PIL import ImageDraw
+    d = ImageDraw.Draw(mask)
+    d.rounded_rectangle((0, 0, w, h), radius=r, fill=255)
+    out = img.copy()
+    out.putalpha(mask)
+    return out
+def make_windows_ico(src_png: Path, out_ico: Path, radius_ratio: float) -> Path:
+    info("Generating Windows .ico")
+    square = load_icon_png(src_png)
+    sizes = [16, 24, 32, 48, 64, 128, 256]
+    images = [rounded(square.resize((s, s), Image.LANCZOS), radius_ratio) for s in sizes]
+    images[0].save(out_ico, format="ICO", sizes=[(s, s) for s in sizes])
+    return out_ico
+def make_macos_icns(src_png: Path, out_icns: Path, radius_ratio: float) -> Path:
+    info("Generating macOS .icns")
+    iconset = BUILD_DIR / "icon.iconset"
+    if iconset.exists():
+        shutil.rmtree(iconset)
+    iconset.mkdir(parents=True, exist_ok=True)
+    square = load_icon_png(src_png)
+    sizes = [16, 32, 64, 128, 256, 512, 1024]
+    mapping = {
+        16:  ["icon_16x16.png", "icon_32x32.png"],
+        32:  ["icon_16x16@2x.png"],
+        64:  ["icon_32x32@2x.png"],
+        128: ["icon_128x128.png", "icon_256x256.png"],
+        256: ["icon_128x128@2x.png"],
+        512: ["icon_512x512.png"],
+        1024:["icon_512x512@2x.png"],
+    }
+    for s in sizes:
+        img = rounded(square.resize((s, s), Image.LANCZOS), radius_ratio)
+        for name in mapping.get(s, []):
+            img.save(iconset / name, format="PNG")
+    try:
+        subprocess.run(["iconutil", "-c", "icns", str(iconset), "-o", str(out_icns)], check=True)
+    except Exception as e:
+        raise RuntimeError("Failed to create .icns. Ensure Xcode command line tools are installed (iconutil).\n"
+                           f"Details: {e}")
+    finally:
+        shutil.rmtree(iconset, ignore_errors=True)
+    return out_icns
+def pyinstaller_add_data_arg(src: Path, dest: str) -> str:
+    sep = ";" if os.name == "nt" else ":"
+    return f"{src}{sep}{dest}"
+def run_pyinstaller(entry: Path, name: str, icon: Path | None, extra_data: list[tuple[Path, str]], bundle_id: str | None = None) -> None:
+    cmd = [
+        sys.executable, "-m", "PyInstaller",
+        "--windowed", "--noconfirm",
+        "--name", name,
+    ]
+    if bundle_id and platform.system().lower() == "darwin":
+        cmd += ["--osx-bundle-identifier", bundle_id]
+    if icon is not None:
+        cmd += ["--icon", str(icon)]
+    for (src, dest) in extra_data:
+        cmd += ["--add-data", pyinstaller_add_data_arg(src, dest)]
+    cmd.append(str(entry))
+    info("Running: " + " ".join(cmd))
+    subprocess.run(cmd, check=True)
+def patch_macos_plist(app_path: Path, bundle_id: str, icon_base_name: str = "appicon") -> None:
+    info("Patching macOS Info.plist")
+    plist_path = app_path / "Contents" / "Info.plist"
+    if not plist_path.exists():
+        info(f"No Info.plist at {plist_path}, skipping patch")
+        return
+    with plist_path.open("rb") as f:
+        data = plistlib.load(f)
+    data["CFBundleIdentifier"] = bundle_id
+    data["CFBundleName"] = data.get("CFBundleName") or app_path.stem
+    data["CFBundleDisplayName"] = data.get("CFBundleDisplayName") or app_path.stem
+    data["CFBundleIconFile"] = icon_base_name
+    data["CFBundleIconName"] = icon_base_name
+    with plist_path.open("wb") as f:
+        plistlib.dump(data, f)
+def make_dmg(app_path: Path, dmg_path: Path, volume_name: str) -> None:
+    info("Creating DMG")
+    staging = BUILD_DIR / "dmg_staging"
+    if staging.exists():
+        shutil.rmtree(staging)
+    (staging).mkdir(parents=True, exist_ok=True)
+    shutil.rmtree(staging / app_path.name, ignore_errors=True)
+    shutil.copytree(app_path, staging / app_path.name, symlinks=True)
+    try:
+        os.symlink("/Applications", staging / "Applications")
+    except FileExistsError:
+        pass
+    dmg_path.parent.mkdir(parents=True, exist_ok=True)
+    subprocess.run([
+        "hdiutil", "create", "-volname", volume_name,
+        "-srcfolder", str(staging),
+        "-format", "UDZO",
+        "-imagekey", "zlib-level=9",
+        str(dmg_path)
+    ], check=True)
+    shutil.rmtree(staging, ignore_errors=True)
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--name", default="ChatMock")
+    parser.add_argument("--entry", default="gui.py")
+    parser.add_argument("--icon", default="icon.png")
+    parser.add_argument("--radius", type=float, default=0.22)
+    parser.add_argument("--square", action="store_true")
+    parser.add_argument("--dmg", action="store_true")
+    parser.add_argument("--dmg-only", action="store_true")
+    args = parser.parse_args()
+    ensure_dirs()
+    entry = ROOT / args.entry
+    icon_src = ROOT / args.icon
+    if args.dmg_only:
+        app_path = ROOT / "dist" / f"{args.name}.app"
+        if not app_path.exists():
+            raise SystemExit(f"App not found: {app_path}")
+        dmg = ROOT / "dist" / f"{args.name}.dmg"
+        make_dmg(app_path, dmg, args.name)
+        return
+    if not entry.exists():
+        raise SystemExit(f"Entry not found: {entry}")
+    if not icon_src.exists():
+        raise SystemExit(f"Icon PNG not found: {icon_src}")
+    os_name = platform.system().lower()
+    extra_data: list[tuple[Path, str]] = [
+        (ROOT / "prompt.md", "."),
+        (ROOT / "prompt_gpt5_codex.md", "."),
+    ]
+    bundle_icon: Path | None = None
+    rr = 0.0 if args.square else float(args.radius)
+    if os_name == "windows":
+        ico = ICONS_DIR / "appicon.ico"
+        make_windows_ico(icon_src, ico, rr)
+        bundle_icon = ico
+        extra_data.append((ico, "."))
+    elif os_name == "darwin":
+        icns = ICONS_DIR / "appicon.icns"
+        make_macos_icns(icon_src, icns, rr)
+        bundle_icon = icns
+        extra_data.append((icns, "."))
+    else:
+        png_copy = ICONS_DIR / "appicon.png"
+        if Image is not None:
+            square = load_icon_png(icon_src).resize((512, 512), Image.LANCZOS)
+            square = rounded(square, rr) if rr > 0 else square
+            square.save(png_copy)
+        else:
+            shutil.copy2(icon_src, png_copy)
+        extra_data.append((png_copy, "."))
+    run_pyinstaller(entry, args.name, bundle_icon, extra_data)
+    if os_name == "darwin":
+        app_path = ROOT / "dist" / f"{args.name}.app"
+        if app_path.exists():
+            bid = "com.chatmock.app"
+            patch_macos_plist(app_path, bundle_id=bid, icon_base_name="appicon")
+            if args.dmg:
+                dmg = ROOT / "dist" / f"{args.name}.dmg"
+                make_dmg(app_path, dmg, args.name)
+if __name__ == "__main__":
+    main()

build/lib/chatmock/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from __future__ import annotations
+from .app import create_app
+from .cli import main
+from .version import __version__

build/lib/chatmock/app.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from __future__ import annotations
+from flask import Flask, jsonify
+from flask_sock import Sock
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .http import build_cors_headers
+from .routes_openai import openai_bp
+from .routes_ollama import ollama_bp
+from .websocket_routes import register_websocket_routes
+def create_app(
+    verbose: bool = False,
+    verbose_obfuscation: bool = False,
+    reasoning_effort: str = "medium",
+    reasoning_summary: str = "auto",
+    reasoning_compat: str = "think-tags",
+    fast_mode: bool = False,
+    debug_model: str | None = None,
+    expose_reasoning_models: bool = False,
+    default_web_search: bool = False,
+) -> Flask:
+    app = Flask(__name__)
+    app.config.update(
+        VERBOSE=bool(verbose),
+        VERBOSE_OBFUSCATION=bool(verbose_obfuscation),
+        REASONING_EFFORT=reasoning_effort,
+        REASONING_SUMMARY=reasoning_summary,
+        REASONING_COMPAT=reasoning_compat,
+        FAST_MODE=bool(fast_mode),
+        DEBUG_MODEL=debug_model,
+        BASE_INSTRUCTIONS=BASE_INSTRUCTIONS,
+        GPT5_CODEX_INSTRUCTIONS=GPT5_CODEX_INSTRUCTIONS,
+        EXPOSE_REASONING_MODELS=bool(expose_reasoning_models),
+        DEFAULT_WEB_SEARCH=bool(default_web_search),
+    )
+    @app.get("/")
+    @app.get("/health")
+    def health():
+        return jsonify({"status": "ok"})
+    @app.after_request
+    def _cors(resp):
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    app.register_blueprint(openai_bp)
+    app.register_blueprint(ollama_bp)
+    sock = Sock(app)
+    register_websocket_routes(sock)
+    return app

build/lib/chatmock/cli.py ADDED Viewed

	@@ -0,0 +1,425 @@

+from __future__ import annotations
+import errno
+import argparse
+import json
+import os
+import sys
+import webbrowser
+from datetime import datetime
+from .app import create_app
+from .config import CLIENT_ID_DEFAULT
+from .limits import RateLimitWindow, compute_reset_at, load_rate_limit_snapshot
+from .oauth import OAuthHTTPServer, OAuthHandler, REQUIRED_PORT, URL_BASE
+from .utils import eprint, get_home_dir, load_chatgpt_tokens, parse_jwt_claims, read_auth_file
+_STATUS_LIMIT_BAR_SEGMENTS = 30
+_STATUS_LIMIT_BAR_FILLED = "█"
+_STATUS_LIMIT_BAR_EMPTY = "░"
+_STATUS_LIMIT_BAR_PARTIAL = "▓"
+def _clamp_percent(value: float) -> float:
+    try:
+        percent = float(value)
+    except Exception:
+        return 0.0
+    if percent != percent:
+        return 0.0
+    if percent < 0.0:
+        return 0.0
+    if percent > 100.0:
+        return 100.0
+    return percent
+def _render_progress_bar(percent_used: float) -> str:
+    ratio = max(0.0, min(1.0, percent_used / 100.0))
+    filled_exact = ratio * _STATUS_LIMIT_BAR_SEGMENTS
+    filled = int(filled_exact)
+    partial = filled_exact - filled
+    has_partial = partial > 0.5
+    if has_partial:
+        filled += 1
+    filled = max(0, min(_STATUS_LIMIT_BAR_SEGMENTS, filled))
+    empty = _STATUS_LIMIT_BAR_SEGMENTS - filled
+    if has_partial and filled > 0:
+        bar = _STATUS_LIMIT_BAR_FILLED * (filled - 1) + _STATUS_LIMIT_BAR_PARTIAL + _STATUS_LIMIT_BAR_EMPTY * empty
+    else:
+        bar = _STATUS_LIMIT_BAR_FILLED * filled + _STATUS_LIMIT_BAR_EMPTY * empty
+    return f"[{bar}]"
+def _get_usage_color(percent_used: float) -> str:
+    if percent_used >= 90:
+        return "\033[91m"
+    elif percent_used >= 75:
+        return "\033[93m"
+    elif percent_used >= 50:
+        return "\033[94m"
+    else:
+        return "\033[92m"
+def _reset_color() -> str:
+    """ANSI reset color code"""
+    return "\033[0m"
+def _format_window_duration(minutes: int | None) -> str | None:
+    if minutes is None:
+        return None
+    try:
+        total = int(minutes)
+    except Exception:
+        return None
+    if total <= 0:
+        return None
+    minutes = total
+    weeks, remainder = divmod(minutes, 7 * 24 * 60)
+    days, remainder = divmod(remainder, 24 * 60)
+    hours, remainder = divmod(remainder, 60)
+    parts = []
+    if weeks:
+        parts.append(f"{weeks} week" + ("s" if weeks != 1 else ""))
+    if days:
+        parts.append(f"{days} day" + ("s" if days != 1 else ""))
+    if hours:
+        parts.append(f"{hours} hour" + ("s" if hours != 1 else ""))
+    if remainder:
+        parts.append(f"{remainder} minute" + ("s" if remainder != 1 else ""))
+    if not parts:
+        parts.append(f"{minutes} minute" + ("s" if minutes != 1 else ""))
+    return " ".join(parts)
+def _format_reset_duration(seconds: int | None) -> str | None:
+    if seconds is None:
+        return None
+    try:
+        value = int(seconds)
+    except Exception:
+        return None
+    if value < 0:
+        value = 0
+    days, remainder = divmod(value, 86400)
+    hours, remainder = divmod(remainder, 3600)
+    minutes, remainder = divmod(remainder, 60)
+    parts: list[str] = []
+    if days:
+        parts.append(f"{days}d")
+    if hours:
+        parts.append(f"{hours}h")
+    if minutes:
+        parts.append(f"{minutes}m")
+    if not parts and remainder:
+        parts.append("under 1m")
+    if not parts:
+        parts.append("0m")
+    return " ".join(parts)
+def _format_local_datetime(dt: datetime) -> str:
+    local = dt.astimezone()
+    tz_name = local.tzname() or "local"
+    return f"{local.strftime('%b %d, %Y %H:%M')} {tz_name}"
+def _print_usage_limits_block() -> None:
+    stored = load_rate_limit_snapshot()
+    print("📊 Usage Limits")
+    if stored is None:
+        print("  No usage data available yet. Send a request through ChatMock first.")
+        print()
+        return
+    update_time = _format_local_datetime(stored.captured_at)
+    print(f"Last updated: {update_time}")
+    print()
+    windows: list[tuple[str, str, RateLimitWindow]] = []
+    if stored.snapshot.primary is not None:
+        windows.append(("⚡", "5 hour limit", stored.snapshot.primary))
+    if stored.snapshot.secondary is not None:
+        windows.append(("📅", "Weekly limit", stored.snapshot.secondary))
+    if not windows:
+        print("  Usage data was captured but no limit windows were provided.")
+        print()
+        return
+    for i, (icon_label, desc, window) in enumerate(windows):
+        if i > 0:
+            print()
+        percent_used = _clamp_percent(window.used_percent)
+        remaining = max(0.0, 100.0 - percent_used)
+        color = _get_usage_color(percent_used)
+        reset = _reset_color()
+        progress = _render_progress_bar(percent_used)
+        usage_text = f"{percent_used:5.1f}% used"
+        remaining_text = f"{remaining:5.1f}% left"
+        print(f"{icon_label} {desc}")
+        print(f"{color}{progress}{reset} {color}{usage_text}{reset} | {remaining_text}")
+        reset_in = _format_reset_duration(window.resets_in_seconds)
+        reset_at = compute_reset_at(stored.captured_at, window)
+        if reset_in and reset_at:
+            reset_at_str = _format_local_datetime(reset_at)
+            print(f"    ⏳ Resets in: {reset_in} at {reset_at_str}")
+        elif reset_in:
+            print(f"    ⏳ Resets in: {reset_in}")
+        elif reset_at:
+            reset_at_str = _format_local_datetime(reset_at)
+            print(f"    ⏳ Resets at: {reset_at_str}")
+    print()
+def cmd_login(no_browser: bool, verbose: bool) -> int:
+    home_dir = get_home_dir()
+    client_id = CLIENT_ID_DEFAULT
+    if not client_id:
+        eprint("ERROR: No OAuth client id configured. Set CHATGPT_LOCAL_CLIENT_ID.")
+        return 1
+    try:
+        bind_host = os.getenv("CHATGPT_LOCAL_LOGIN_BIND", "127.0.0.1")
+        httpd = OAuthHTTPServer((bind_host, REQUIRED_PORT), OAuthHandler, home_dir=home_dir, client_id=client_id, verbose=verbose)
+    except OSError as e:
+        eprint(f"ERROR: {e}")
+        if e.errno == errno.EADDRINUSE:
+            return 13
+        return 1
+    auth_url = httpd.auth_url()
+    with httpd:
+        eprint(f"Starting local login server on {URL_BASE}")
+        if not no_browser:
+            try:
+                webbrowser.open(auth_url, new=1, autoraise=True)
+            except Exception as e:
+                eprint(f"Failed to open browser: {e}")
+        eprint(f"If your browser did not open, navigate to:\n{auth_url}")
+        def _stdin_paste_worker() -> None:
+            try:
+                eprint(
+                    "If the browser can't reach this machine, paste the full redirect URL here and press Enter (or leave blank to keep waiting):"
+                )
+                line = sys.stdin.readline().strip()
+                if not line:
+                    return
+                try:
+                    from urllib.parse import urlparse, parse_qs
+                    parsed = urlparse(line)
+                    params = parse_qs(parsed.query)
+                    code = (params.get("code") or [None])[0]
+                    state = (params.get("state") or [None])[0]
+                    if not code:
+                        eprint("Input did not contain an auth code. Ignoring.")
+                        return
+                    if state and state != httpd.state:
+                        eprint("State mismatch. Ignoring pasted URL for safety.")
+                        return
+                    eprint("Received redirect URL. Completing login without callback…")
+                    bundle, _ = httpd.exchange_code(code)
+                    if httpd.persist_auth(bundle):
+                        httpd.exit_code = 0
+                        eprint("Login successful. Tokens saved.")
+                    else:
+                        eprint("ERROR: Unable to persist auth file.")
+                    httpd.shutdown()
+                except Exception as exc:
+                    eprint(f"Failed to process pasted redirect URL: {exc}")
+            except Exception:
+                pass
+        try:
+            import threading
+            threading.Thread(target=_stdin_paste_worker, daemon=True).start()
+        except Exception:
+            pass
+        try:
+            httpd.serve_forever()
+        except KeyboardInterrupt:
+            eprint("\nKeyboard interrupt received, exiting.")
+        return httpd.exit_code
+def cmd_serve(
+    host: str,
+    port: int,
+    verbose: bool,
+    verbose_obfuscation: bool,
+    reasoning_effort: str,
+    reasoning_summary: str,
+    reasoning_compat: str,
+    fast_mode: bool,
+    debug_model: str | None,
+    expose_reasoning_models: bool,
+    default_web_search: bool,
+) -> int:
+    app = create_app(
+        verbose=verbose,
+        verbose_obfuscation=verbose_obfuscation,
+        reasoning_effort=reasoning_effort,
+        reasoning_summary=reasoning_summary,
+        reasoning_compat=reasoning_compat,
+        fast_mode=fast_mode,
+        debug_model=debug_model,
+        expose_reasoning_models=expose_reasoning_models,
+        default_web_search=default_web_search,
+    )
+    app.run(host=host, use_reloader=False, port=port, threaded=True)
+    return 0
+def main() -> None:
+    parser = argparse.ArgumentParser(description="ChatMock: login & OpenAI-compatible proxy")
+    sub = parser.add_subparsers(dest="command", required=True)
+    p_login = sub.add_parser("login", help="Authorize with ChatGPT and store tokens")
+    p_login.add_argument("--no-browser", action="store_true", help="Do not open the browser automatically")
+    p_login.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+    p_serve = sub.add_parser("serve", help="Run local OpenAI-compatible server")
+    p_serve.add_argument("--host", default="127.0.0.1")
+    p_serve.add_argument("--port", type=int, default=8000)
+    p_serve.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+    p_serve.add_argument(
+        "--verbose-obfuscation",
+        action="store_true",
+        help="Also dump raw SSE/obfuscation events (in addition to --verbose request/response logs).",
+    )
+    p_serve.add_argument(
+        "--debug-model",
+        dest="debug_model",
+        default=os.getenv("CHATGPT_LOCAL_DEBUG_MODEL"),
+        help="Forcibly override requested 'model' with this value",
+    )
+    p_serve.add_argument(
+        "--fast-mode",
+        action=argparse.BooleanOptionalAction,
+        default=(os.getenv("CHATGPT_LOCAL_FAST_MODE") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help="Enable GPT fast mode by default for supported models; request-level overrides still take precedence.",
+    )
+    p_serve.add_argument(
+        "--reasoning-effort",
+        choices=["none", "minimal", "low", "medium", "high", "xhigh"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_EFFORT", "medium").lower(),
+        help="Reasoning effort level for Responses API (default: medium)",
+    )
+    p_serve.add_argument(
+        "--reasoning-summary",
+        choices=["auto", "concise", "detailed", "none"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_SUMMARY", "auto").lower(),
+        help="Reasoning summary verbosity (default: auto)",
+    )
+    p_serve.add_argument(
+        "--reasoning-compat",
+        choices=["legacy", "o3", "think-tags", "current"],
+        default=os.getenv("CHATGPT_LOCAL_REASONING_COMPAT", "think-tags").lower(),
+        help=(
+            "Compatibility mode for exposing reasoning to clients (legacy|o3|think-tags). "
+            "'current' is accepted as an alias for 'legacy'"
+        ),
+    )
+    p_serve.add_argument(
+        "--expose-reasoning-models",
+        action="store_true",
+        default=(os.getenv("CHATGPT_LOCAL_EXPOSE_REASONING_MODELS") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help=(
+            "Expose GPT-5 family reasoning effort variants (none|minimal|low|medium|high|xhigh where supported) "
+            "as separate models from /v1/models. This allows choosing effort via model selection in compatible UIs."
+        ),
+    )
+    p_serve.add_argument(
+        "--enable-web-search",
+        action=argparse.BooleanOptionalAction,
+        default=(os.getenv("CHATGPT_LOCAL_ENABLE_WEB_SEARCH") or "").strip().lower() in ("1", "true", "yes", "on"),
+        help=(
+            "Enable default web_search tool when a request omits responses_tools (off by default). "
+            "Also configurable via CHATGPT_LOCAL_ENABLE_WEB_SEARCH."
+        ),
+    )
+    p_info = sub.add_parser("info", help="Print current stored tokens and derived account id")
+    p_info.add_argument("--json", action="store_true", help="Output raw auth.json contents")
+    args = parser.parse_args()
+    if args.command == "login":
+        sys.exit(cmd_login(no_browser=args.no_browser, verbose=args.verbose))
+    elif args.command == "serve":
+        sys.exit(
+            cmd_serve(
+                host=args.host,
+                port=args.port,
+                verbose=args.verbose,
+                verbose_obfuscation=args.verbose_obfuscation,
+                reasoning_effort=args.reasoning_effort,
+                reasoning_summary=args.reasoning_summary,
+                reasoning_compat=args.reasoning_compat,
+                fast_mode=args.fast_mode,
+                debug_model=args.debug_model,
+                expose_reasoning_models=args.expose_reasoning_models,
+                default_web_search=args.enable_web_search,
+            )
+        )
+    elif args.command == "info":
+        auth = read_auth_file()
+        if getattr(args, "json", False):
+            print(json.dumps(auth or {}, indent=2))
+            sys.exit(0)
+        access_token, account_id, id_token = load_chatgpt_tokens()
+        if not access_token or not id_token:
+            print("👤 Account")
+            print("  • Not signed in")
+            print("  • Run: python3 chatmock.py login")
+            print("")
+            _print_usage_limits_block()
+            sys.exit(0)
+        id_claims = parse_jwt_claims(id_token) or {}
+        access_claims = parse_jwt_claims(access_token) or {}
+        email = id_claims.get("email") or id_claims.get("preferred_username") or "<unknown>"
+        plan_raw = (access_claims.get("https://api.openai.com/auth") or {}).get("chatgpt_plan_type") or "unknown"
+        plan_map = {
+            "plus": "Plus",
+            "pro": "Pro",
+            "free": "Free",
+            "team": "Team",
+            "enterprise": "Enterprise",
+        }
+        plan = plan_map.get(str(plan_raw).lower(), str(plan_raw).title() if isinstance(plan_raw, str) else "Unknown")
+        print("👤 Account")
+        print("  • Signed in with ChatGPT")
+        print(f"  • Login: {email}")
+        print(f"  • Plan: {plan}")
+        if account_id:
+            print(f"  • Account ID: {account_id}")
+        print("")
+        _print_usage_limits_block()
+        sys.exit(0)
+    else:
+        parser.error("Unknown command")
+if __name__ == "__main__":
+    main()

build/lib/chatmock/config.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from __future__ import annotations
+import os
+import sys
+from pathlib import Path
+CLIENT_ID_DEFAULT = os.getenv("CHATGPT_LOCAL_CLIENT_ID") or "app_EMoamEEZ73f0CkXaXp7hrann"
+OAUTH_ISSUER_DEFAULT = os.getenv("CHATGPT_LOCAL_ISSUER") or "https://auth.openai.com"
+OAUTH_TOKEN_URL = f"{OAUTH_ISSUER_DEFAULT}/oauth/token"
+CHATGPT_RESPONSES_URL = "https://chatgpt.com/backend-api/codex/responses"
+def _read_prompt_text(filename: str) -> str | None:
+    candidates = [
+        Path(__file__).parent.parent / filename,
+        Path(__file__).parent / filename,
+        Path(getattr(sys, "_MEIPASS", "")) / filename if getattr(sys, "_MEIPASS", None) else None,
+        Path.cwd() / filename,
+    ]
+    for candidate in candidates:
+        if not candidate:
+            continue
+        try:
+            if candidate.exists():
+                content = candidate.read_text(encoding="utf-8")
+                if isinstance(content, str) and content.strip():
+                    return content
+        except Exception:
+            continue
+    return None
+def read_base_instructions() -> str:
+    content = _read_prompt_text("prompt.md")
+    if content is None:
+        raise FileNotFoundError("Failed to read prompt.md; expected adjacent to package or CWD.")
+    return content
+def read_gpt5_codex_instructions(fallback: str) -> str:
+    content = _read_prompt_text("prompt_gpt5_codex.md")
+    return content if isinstance(content, str) and content.strip() else fallback
+BASE_INSTRUCTIONS = read_base_instructions()
+GPT5_CODEX_INSTRUCTIONS = read_gpt5_codex_instructions(BASE_INSTRUCTIONS)

build/lib/chatmock/fast_mode.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+from .model_registry import normalize_model_name
+PRIORITY_SUPPORTED_MODELS = frozenset(
+    (
+        "gpt-5.4",
+        "gpt-5.2",
+        "gpt-5.1",
+        "gpt-5",
+        "gpt-5.1-codex",
+        "gpt-5-codex",
+    )
+)
+_TRUE_STRINGS = {"1", "true", "yes", "on"}
+_FALSE_STRINGS = {"0", "false", "no", "off"}
+def parse_optional_bool(value: Any) -> bool | None:
+    if isinstance(value, bool):
+        return value
+    if isinstance(value, str):
+        normalized = value.strip().lower()
+        if normalized in _TRUE_STRINGS:
+            return True
+        if normalized in _FALSE_STRINGS:
+            return False
+    return None
+def supports_priority_service_tier(model: str | None) -> bool:
+    return normalize_model_name(model) in PRIORITY_SUPPORTED_MODELS
+@dataclass(frozen=True)
+class ServiceTierResolution:
+    service_tier: str | None
+    error_message: str | None = None
+    warning_message: str | None = None
+    used_server_default: bool = False
+def resolve_service_tier(
+    model: str | None,
+    *,
+    request_fast_mode: Any = None,
+    request_service_tier: Any = None,
+    server_fast_mode: bool = False,
+) -> ServiceTierResolution:
+    explicit_fast_mode = parse_optional_bool(request_fast_mode)
+    tier: str | None = None
+    explicit_request = False
+    used_server_default = False
+    if explicit_fast_mode is not None:
+        tier = "priority" if explicit_fast_mode else None
+        explicit_request = True
+    elif isinstance(request_service_tier, str) and request_service_tier.strip():
+        tier = request_service_tier.strip().lower()
+        explicit_request = True
+    elif server_fast_mode:
+        tier = "priority"
+        used_server_default = True
+    if tier == "priority" and not supports_priority_service_tier(model):
+        normalized = normalize_model_name(model)
+        message = (
+            f"Fast mode is not supported for model '{normalized}'. "
+            "Use a supported GPT-5 priority-processing model or disable fast mode for this request."
+        )
+        if explicit_request:
+            return ServiceTierResolution(
+                service_tier=None,
+                error_message=message,
+                used_server_default=used_server_default,
+            )
+        return ServiceTierResolution(
+            service_tier=None,
+            warning_message=message,
+            used_server_default=used_server_default,
+        )
+    return ServiceTierResolution(
+        service_tier=tier,
+        used_server_default=used_server_default,
+    )

build/lib/chatmock/http.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from __future__ import annotations
+from flask import Response, jsonify, request
+def build_cors_headers() -> dict:
+    origin = request.headers.get("Origin", "*")
+    req_headers = request.headers.get("Access-Control-Request-Headers")
+    allow_headers = req_headers if req_headers else "Authorization, Content-Type, Accept"
+    return {
+        "Access-Control-Allow-Origin": origin,
+        "Access-Control-Allow-Methods": "POST, GET, OPTIONS",
+        "Access-Control-Allow-Headers": allow_headers,
+        "Access-Control-Max-Age": "86400",
+    }
+def json_error(message: str, status: int = 400) -> Response:
+    resp = jsonify({"error": {"message": message}})
+    response: Response = Response(response=resp.response, status=status, mimetype="application/json")
+    for k, v in build_cors_headers().items():
+        response.headers.setdefault(k, v)
+    return response

build/lib/chatmock/limits.py ADDED Viewed

	@@ -0,0 +1,200 @@

+from __future__ import annotations
+import json
+import os
+from dataclasses import dataclass
+from datetime import datetime, timedelta, timezone
+from typing import Any, Mapping, Optional
+from .utils import get_home_dir
+_PRIMARY_USED = "x-codex-primary-used-percent"
+_PRIMARY_WINDOW = "x-codex-primary-window-minutes"
+_PRIMARY_RESET = "x-codex-primary-reset-after-seconds"
+_SECONDARY_USED = "x-codex-secondary-used-percent"
+_SECONDARY_WINDOW = "x-codex-secondary-window-minutes"
+_SECONDARY_RESET = "x-codex-secondary-reset-after-seconds"
+_LIMITS_FILENAME = "usage_limits.json"
+@dataclass
+class RateLimitWindow:
+    used_percent: float
+    window_minutes: Optional[int]
+    resets_in_seconds: Optional[int]
+@dataclass
+class RateLimitSnapshot:
+    primary: Optional[RateLimitWindow]
+    secondary: Optional[RateLimitWindow]
+@dataclass
+class StoredRateLimitSnapshot:
+    captured_at: datetime
+    snapshot: RateLimitSnapshot
+def _parse_float(value: Any) -> Optional[float]:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, (int, float)):
+            return float(value)
+        value_str = str(value).strip()
+        if not value_str:
+            return None
+        parsed = float(value_str)
+        if not (parsed == parsed and parsed not in (float("inf"), float("-inf"))):
+            return None
+        return parsed
+    except Exception:
+        return None
+def _parse_int(value: Any) -> Optional[int]:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, bool):
+            return None
+        if isinstance(value, int):
+            return value
+        value_str = str(value).strip()
+        if not value_str:
+            return None
+        return int(value_str)
+    except Exception:
+        return None
+def _parse_window(headers: Mapping[str, Any], used_key: str, window_key: str, reset_key: str) -> Optional[RateLimitWindow]:
+    used_percent = _parse_float(headers.get(used_key))
+    if used_percent is None:
+        return None
+    window_minutes = _parse_int(headers.get(window_key))
+    resets_in_seconds = _parse_int(headers.get(reset_key))
+    return RateLimitWindow(used_percent=used_percent, window_minutes=window_minutes, resets_in_seconds=resets_in_seconds)
+def parse_rate_limit_headers(headers: Mapping[str, Any]) -> Optional[RateLimitSnapshot]:
+    try:
+        primary = _parse_window(headers, _PRIMARY_USED, _PRIMARY_WINDOW, _PRIMARY_RESET)
+        secondary = _parse_window(headers, _SECONDARY_USED, _SECONDARY_WINDOW, _SECONDARY_RESET)
+        if primary is None and secondary is None:
+            return None
+        return RateLimitSnapshot(primary=primary, secondary=secondary)
+    except Exception:
+        return None
+def _limits_path() -> str:
+    home = get_home_dir()
+    return os.path.join(home, _LIMITS_FILENAME)
+def store_rate_limit_snapshot(snapshot: RateLimitSnapshot, captured_at: Optional[datetime] = None) -> None:
+    captured = captured_at or datetime.now(timezone.utc)
+    try:
+        home = get_home_dir()
+        os.makedirs(home, exist_ok=True)
+        payload: dict[str, Any] = {
+            "captured_at": captured.isoformat(),
+        }
+        if snapshot.primary:
+            payload["primary"] = {
+                "used_percent": snapshot.primary.used_percent,
+                "window_minutes": snapshot.primary.window_minutes,
+                "resets_in_seconds": snapshot.primary.resets_in_seconds,
+            }
+        if snapshot.secondary:
+            payload["secondary"] = {
+                "used_percent": snapshot.secondary.used_percent,
+                "window_minutes": snapshot.secondary.window_minutes,
+                "resets_in_seconds": snapshot.secondary.resets_in_seconds,
+            }
+        with open(_limits_path(), "w", encoding="utf-8") as fp:
+            if hasattr(os, "fchmod"):
+                try:
+                    os.fchmod(fp.fileno(), 0o600)
+                except OSError:
+                    pass
+            json.dump(payload, fp, indent=2)
+    except Exception:
+        # Silently ignore persistence errors.
+        pass
+def load_rate_limit_snapshot() -> Optional[StoredRateLimitSnapshot]:
+    try:
+        with open(_limits_path(), "r", encoding="utf-8") as fp:
+            raw = json.load(fp)
+    except FileNotFoundError:
+        return None
+    except Exception:
+        return None
+    captured_raw = raw.get("captured_at")
+    captured_at = _parse_datetime(captured_raw)
+    if captured_at is None:
+        return None
+    snapshot = RateLimitSnapshot(
+        primary=_dict_to_window(raw.get("primary")),
+        secondary=_dict_to_window(raw.get("secondary")),
+    )
+    if snapshot.primary is None and snapshot.secondary is None:
+        return None
+    return StoredRateLimitSnapshot(captured_at=captured_at, snapshot=snapshot)
+def _parse_datetime(value: Any) -> Optional[datetime]:
+    if not isinstance(value, str):
+        return None
+    text = value.strip()
+    if not text:
+        return None
+    if text.endswith("Z"):
+        text = text[:-1] + "+00:00"
+    try:
+        dt = datetime.fromisoformat(text)
+        if dt.tzinfo is None:
+            return dt.replace(tzinfo=timezone.utc)
+        return dt
+    except ValueError:
+        return None
+def _dict_to_window(value: Any) -> Optional[RateLimitWindow]:
+    if not isinstance(value, dict):
+        return None
+    used = _parse_float(value.get("used_percent"))
+    if used is None:
+        return None
+    window = _parse_int(value.get("window_minutes"))
+    resets = _parse_int(value.get("resets_in_seconds"))
+    return RateLimitWindow(used_percent=used, window_minutes=window, resets_in_seconds=resets)
+def record_rate_limits_from_response(response: Any) -> None:
+    if response is None:
+        return
+    headers = getattr(response, "headers", None)
+    if headers is None:
+        return
+    snapshot = parse_rate_limit_headers(headers)
+    if snapshot is None:
+        return
+    store_rate_limit_snapshot(snapshot)
+def compute_reset_at(captured_at: datetime, window: RateLimitWindow) -> Optional[datetime]:
+    if window.resets_in_seconds is None:
+        return None
+    try:
+        return captured_at + timedelta(seconds=int(window.resets_in_seconds))
+    except Exception:
+        return None

build/lib/chatmock/model_registry.py ADDED Viewed

	@@ -0,0 +1,198 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Iterable
+ALL_REASONING_EFFORTS = ("none", "minimal", "low", "medium", "high", "xhigh")
+DEFAULT_REASONING_EFFORTS = frozenset(ALL_REASONING_EFFORTS)
+@dataclass(frozen=True)
+class ModelSpec:
+    public_id: str
+    upstream_id: str
+    aliases: tuple[str, ...]
+    allowed_efforts: frozenset[str]
+    variant_efforts: tuple[str, ...]
+    uses_codex_instructions: bool = False
+_MODEL_SPECS = (
+    ModelSpec(
+        public_id="gpt-5",
+        upstream_id="gpt-5",
+        aliases=("gpt5", "gpt-5-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=("high", "medium", "low", "minimal"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.1",
+        upstream_id="gpt-5.1",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=("high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.2",
+        upstream_id="gpt-5.2",
+        aliases=("gpt5.2", "gpt-5.2-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.4",
+        upstream_id="gpt-5.4",
+        aliases=("gpt5.4", "gpt-5.4-latest"),
+        allowed_efforts=frozenset(("none", "low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low", "none"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.4-mini",
+        upstream_id="gpt-5.4-mini",
+        aliases=("gpt5.4-mini", "gpt-5.4-mini-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+    ),
+    ModelSpec(
+        public_id="gpt-5.3-codex",
+        upstream_id="gpt-5.3-codex",
+        aliases=("gpt5.3-codex", "gpt-5.3-codex-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.3-codex-spark",
+        upstream_id="gpt-5.3-codex-spark",
+        aliases=("gpt5.3-codex-spark", "gpt-5.3-codex-spark-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5-codex",
+        upstream_id="gpt-5-codex",
+        aliases=("gpt5-codex", "gpt-5-codex-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=("high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.2-codex",
+        upstream_id="gpt-5.2-codex",
+        aliases=("gpt5.2-codex", "gpt-5.2-codex-latest"),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex",
+        upstream_id="gpt-5.1-codex",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=("high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex-max",
+        upstream_id="gpt-5.1-codex-max",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high", "xhigh")),
+        variant_efforts=("xhigh", "high", "medium", "low"),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="gpt-5.1-codex-mini",
+        upstream_id="gpt-5.1-codex-mini",
+        aliases=(),
+        allowed_efforts=frozenset(("low", "medium", "high")),
+        variant_efforts=(),
+        uses_codex_instructions=True,
+    ),
+    ModelSpec(
+        public_id="codex-mini",
+        upstream_id="codex-mini-latest",
+        aliases=("codex", "codex-mini-latest"),
+        allowed_efforts=DEFAULT_REASONING_EFFORTS,
+        variant_efforts=(),
+        uses_codex_instructions=True,
+    ),
+)
+_SPECS_BY_UPSTREAM = {spec.upstream_id: spec for spec in _MODEL_SPECS}
+_ALIASES = {}
+for _spec in _MODEL_SPECS:
+    _ALIASES[_spec.public_id] = _spec.upstream_id
+    for _alias in _spec.aliases:
+        _ALIASES[_alias] = _spec.upstream_id
+def _strip_model_name(model: str | None) -> tuple[str, str | None]:
+    if not isinstance(model, str):
+        return "", None
+    value = model.strip().lower()
+    if not value:
+        return "", None
+    if ":" in value:
+        base, maybe_effort = value.rsplit(":", 1)
+        if maybe_effort in DEFAULT_REASONING_EFFORTS:
+            return base, maybe_effort
+    for separator in ("-", "_"):
+        for effort in ALL_REASONING_EFFORTS:
+            suffix = f"{separator}{effort}"
+            if value.endswith(suffix):
+                return value[: -len(suffix)], effort
+    return value, None
+def model_spec_for_name(model: str | None) -> ModelSpec | None:
+    base, _ = _strip_model_name(model)
+    upstream_id = _ALIASES.get(base)
+    if not upstream_id:
+        return None
+    return _SPECS_BY_UPSTREAM.get(upstream_id)
+def normalize_model_name(model: str | None, debug_model: str | None = None) -> str:
+    if isinstance(debug_model, str) and debug_model.strip():
+        return debug_model.strip()
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.upstream_id
+    base, _ = _strip_model_name(model)
+    return base or "gpt-5.4"
+def uses_codex_instructions(model: str | None) -> bool:
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.uses_codex_instructions
+    return "codex" in ((model or "").strip().lower())
+def allowed_efforts_for_model(model: str | None) -> frozenset[str]:
+    spec = model_spec_for_name(model)
+    if spec is not None:
+        return spec.allowed_efforts
+    return DEFAULT_REASONING_EFFORTS
+def extract_reasoning_from_model_name(model: str | None) -> dict[str, str] | None:
+    _, effort = _strip_model_name(model)
+    if not effort:
+        return None
+    return {"effort": effort}
+def list_public_models(expose_reasoning_models: bool = False) -> list[str]:
+    model_ids: list[str] = []
+    for spec in _MODEL_SPECS:
+        model_ids.append(spec.public_id)
+        if expose_reasoning_models:
+            model_ids.extend(f"{spec.public_id}-{effort}" for effort in spec.variant_efforts)
+    return model_ids
+def iter_public_models() -> Iterable[ModelSpec]:
+    return _MODEL_SPECS

build/lib/chatmock/models.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional
+@dataclass
+class TokenData:
+    id_token: str
+    access_token: str
+    refresh_token: str
+    account_id: str
+@dataclass
+class AuthBundle:
+    api_key: Optional[str]
+    token_data: TokenData
+    last_refresh: str
+@dataclass
+class PkceCodes:
+    code_verifier: str
+    code_challenge: str

build/lib/chatmock/oauth.py ADDED Viewed

	@@ -0,0 +1,340 @@

+from __future__ import annotations
+import datetime
+import ssl
+import http.server
+import json
+import secrets
+import threading
+import time
+import urllib.parse
+import urllib.request
+from typing import Any, Dict, Tuple
+import certifi
+from .config import OAUTH_ISSUER_DEFAULT
+from .models import AuthBundle, PkceCodes, TokenData
+from .utils import eprint, generate_pkce, parse_jwt_claims, write_auth_file
+REQUIRED_PORT = 1455
+URL_BASE = f"http://localhost:{REQUIRED_PORT}"
+DEFAULT_ISSUER = OAUTH_ISSUER_DEFAULT
+LOGIN_SUCCESS_HTML = """<!DOCTYPE html>
+<html lang=\"en\">
+  <head>
+    <meta charset=\"utf-8\" />
+    <title>Login successful</title>
+  </head>
+  <body>
+    <div style=\"max-width: 640px; margin: 80px auto; font-family: system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, sans-serif;\">
+      <h1>Login successful</h1>
+      <p>You can now close this window and return to the terminal and run <code>python3 chatmock.py serve</code> to start the server.</p>
+    </div>
+  </body>
+  </html>
+"""
+_SSL_CONTEXT = ssl.create_default_context(cafile=certifi.where())
+class OAuthHTTPServer(http.server.HTTPServer):
+    def __init__(
+        self,
+        server_address: tuple[str, int],
+        request_handler_class: type[http.server.BaseHTTPRequestHandler],
+        *,
+        home_dir: str,
+        client_id: str,
+        verbose: bool = False,
+    ) -> None:
+        super().__init__(server_address, request_handler_class, bind_and_activate=True)
+        self.exit_code = 1
+        self.home_dir = home_dir
+        self.verbose = verbose
+        self.issuer = DEFAULT_ISSUER
+        self.token_endpoint = f"{self.issuer}/oauth/token"
+        self.client_id = client_id
+        port = server_address[1]
+        self.redirect_uri = f"http://localhost:{port}/auth/callback"
+        self.pkce = generate_pkce()
+        self.state = secrets.token_hex(32)
+    def auth_url(self) -> str:
+        params = {
+            "response_type": "code",
+            "client_id": self.client_id,
+            "redirect_uri": self.redirect_uri,
+            "scope": "openid profile email offline_access",
+            "code_challenge": self.pkce.code_challenge,
+            "code_challenge_method": "S256",
+            "id_token_add_organizations": "true",
+            "codex_cli_simplified_flow": "true",
+            "state": self.state,
+        }
+        return f"{self.issuer}/oauth/authorize?" + urllib.parse.urlencode(params)
+    def exchange_code(self, code: str) -> tuple[AuthBundle, str]:
+        data = urllib.parse.urlencode(
+            {
+                "grant_type": "authorization_code",
+                "code": code,
+                "redirect_uri": self.redirect_uri,
+                "client_id": self.client_id,
+                "code_verifier": self.pkce.code_verifier,
+            }
+        ).encode()
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.token_endpoint,
+                data=data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            payload = json.loads(resp.read().decode())
+        id_token = payload.get("id_token", "")
+        access_token = payload.get("access_token", "")
+        refresh_token = payload.get("refresh_token", "")
+        id_token_claims = parse_jwt_claims(id_token)
+        access_token_claims = parse_jwt_claims(access_token)
+        auth_claims = (id_token_claims or {}).get("https://api.openai.com/auth", {})
+        chatgpt_account_id = auth_claims.get("chatgpt_account_id", "")
+        token_data = TokenData(
+            id_token=id_token,
+            access_token=access_token,
+            refresh_token=refresh_token,
+            account_id=chatgpt_account_id,
+        )
+        api_key, success_url = self.maybe_obtain_api_key(
+            id_token_claims or {}, access_token_claims or {}, token_data
+        )
+        last_refresh_str = (
+            datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")
+        )
+        bundle = AuthBundle(api_key=api_key, token_data=token_data, last_refresh=last_refresh_str)
+        return bundle, success_url or f"{URL_BASE}/success"
+    def maybe_obtain_api_key(
+        self,
+        token_claims: Dict[str, Any],
+        access_claims: Dict[str, Any],
+        token_data: TokenData,
+    ) -> tuple[str | None, str | None]:
+        org_id = token_claims.get("organization_id")
+        project_id = token_claims.get("project_id")
+        if not org_id or not project_id:
+            query = {
+                "id_token": token_data.id_token,
+                "needs_setup": "false",
+                "org_id": org_id or "",
+                "project_id": project_id or "",
+                "plan_type": access_claims.get("chatgpt_plan_type"),
+                "platform_url": "https://platform.openai.com",
+            }
+            return None, f"{URL_BASE}/success?{urllib.parse.urlencode(query)}"
+        today = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
+        exchange_data = urllib.parse.urlencode(
+            {
+                "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
+                "client_id": self.client_id,
+                "requested_token": "openai-api-key",
+                "subject_token": token_data.id_token,
+                "subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
+                "name": f"ChatMock [auto-generated] ({today})",
+            }
+        ).encode()
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.token_endpoint,
+                data=exchange_data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            exchange_payload = json.loads(resp.read().decode())
+            exchanged_access_token = exchange_payload.get("access_token")
+        chatgpt_plan_type = access_claims.get("chatgpt_plan_type")
+        success_url_query = {
+            "id_token": token_data.id_token,
+            "access_token": token_data.access_token,
+            "refresh_token": token_data.refresh_token,
+            "exchanged_access_token": exchanged_access_token,
+            "org_id": org_id,
+            "project_id": project_id,
+            "plan_type": chatgpt_plan_type,
+            "platform_url": "https://platform.openai.com",
+        }
+        success_url = f"{URL_BASE}/success?{urllib.parse.urlencode(success_url_query)}"
+        return exchanged_access_token, success_url
+    def persist_auth(self, bundle: AuthBundle) -> bool:
+        auth_json_contents = {
+            "OPENAI_API_KEY": bundle.api_key,
+            "tokens": {
+                "id_token": bundle.token_data.id_token,
+                "access_token": bundle.token_data.access_token,
+                "refresh_token": bundle.token_data.refresh_token,
+                "account_id": bundle.token_data.account_id,
+            },
+            "last_refresh": bundle.last_refresh,
+        }
+        return write_auth_file(auth_json_contents)
+class OAuthHandler(http.server.BaseHTTPRequestHandler):
+    server: "OAuthHTTPServer"
+    def do_GET(self) -> None:
+        path = urllib.parse.urlparse(self.path).path
+        if path == "/success":
+            self._send_html(LOGIN_SUCCESS_HTML)
+            try:
+                self.wfile.flush()
+            except Exception as e:
+                eprint(f"Failed to flush response: {e}")
+            self._shutdown_after_delay(2.0)
+            return
+        if path != "/auth/callback":
+            self.send_error(404, "Not Found")
+            self._shutdown()
+            return
+        query = urllib.parse.urlparse(self.path).query
+        params = urllib.parse.parse_qs(query)
+        code = params.get("code", [None])[0]
+        if not code:
+            self.send_error(400, "Missing auth code")
+            self._shutdown()
+            return
+        try:
+            auth_bundle, success_url = self._exchange_code(code)
+        except Exception as exc:
+            self.send_error(500, f"Token exchange failed: {exc}")
+            self._shutdown()
+            return
+        auth_json_contents = {
+            "OPENAI_API_KEY": auth_bundle.api_key,
+            "tokens": {
+                "id_token": auth_bundle.token_data.id_token,
+                "access_token": auth_bundle.token_data.access_token,
+                "refresh_token": auth_bundle.token_data.refresh_token,
+                "account_id": auth_bundle.token_data.account_id,
+            },
+            "last_refresh": auth_bundle.last_refresh,
+        }
+        if write_auth_file(auth_json_contents):
+            self.server.exit_code = 0
+            self._send_html(LOGIN_SUCCESS_HTML)
+        else:
+            self.send_error(500, "Unable to persist auth file")
+        self._shutdown_after_delay(2.0)
+    def do_POST(self) -> None:
+        self.send_error(404, "Not Found")
+        self._shutdown()
+    def log_message(self, fmt: str, *args):
+        if getattr(self.server, "verbose", False):
+            super().log_message(fmt, *args)
+    def _send_redirect(self, url: str) -> None:
+        self.send_response(302)
+        self.send_header("Location", url)
+        self.end_headers()
+    def _send_html(self, body: str) -> None:
+        encoded = body.encode()
+        self.send_response(200)
+        self.send_header("Content-Type", "text/html; charset=utf-8")
+        self.send_header("Content-Length", str(len(encoded)))
+        self.end_headers()
+        self.wfile.write(encoded)
+    def _shutdown(self) -> None:
+        threading.Thread(target=self.server.shutdown, daemon=True).start()
+    def _shutdown_after_delay(self, seconds: float = 2.0) -> None:
+        def _later():
+            try:
+                time.sleep(seconds)
+            finally:
+                self._shutdown()
+        threading.Thread(target=_later, daemon=True).start()
+    def _exchange_code(self, code: str) -> Tuple[AuthBundle, str]:
+        return self.server.exchange_code(code)
+    def _maybe_obtain_api_key(
+        self,
+        token_claims: Dict[str, Any],
+        access_claims: Dict[str, Any],
+        token_data: TokenData,
+    ) -> Tuple[str | None, str | None]:
+        org_id = token_claims.get("organization_id")
+        project_id = token_claims.get("project_id")
+        if not org_id or not project_id:
+            query = {
+                "id_token": token_data.id_token,
+                "needs_setup": "false",
+                "org_id": org_id or "",
+                "project_id": project_id or "",
+                "plan_type": access_claims.get("chatgpt_plan_type"),
+                "platform_url": "https://platform.openai.com",
+            }
+            return None, f"{URL_BASE}/success?{urllib.parse.urlencode(query)}"
+        today = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
+        exchange_data = urllib.parse.urlencode(
+            {
+                "grant_type": "urn:ietf:params:oauth:grant-type:token-exchange",
+                "client_id": self.server.client_id,
+                "requested_token": "openai-api-key",
+                "subject_token": token_data.id_token,
+                "subject_token_type": "urn:ietf:params:oauth:token-type:id_token",
+                "name": f"ChatMock [auto-generated] ({today})",
+            }
+        ).encode()
+        with urllib.request.urlopen(
+            urllib.request.Request(
+                self.server.token_endpoint,
+                data=exchange_data,
+                method="POST",
+                headers={"Content-Type": "application/x-www-form-urlencoded"},
+            ),
+            context=_SSL_CONTEXT,
+        ) as resp:
+            exchange_payload = json.loads(resp.read().decode())
+            exchanged_access_token = exchange_payload.get("access_token")
+        chatgpt_plan_type = access_claims.get("chatgpt_plan_type")
+        success_url_query = {
+            "id_token": token_data.id_token,
+            "needs_setup": "false",
+            "org_id": org_id,
+            "project_id": project_id,
+            "plan_type": chatgpt_plan_type,
+            "platform_url": "https://platform.openai.com",
+        }
+        success_url = f"{URL_BASE}/success?{urllib.parse.urlencode(success_url_query)}"
+        return exchanged_access_token, success_url

build/lib/chatmock/prompt.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../prompt.md

build/lib/chatmock/prompt_gpt5_codex.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ ../prompt_gpt5_codex.md

build/lib/chatmock/reasoning.py ADDED Viewed

	@@ -0,0 +1,79 @@

+from __future__ import annotations
+from typing import Any, Dict
+from .model_registry import DEFAULT_REASONING_EFFORTS, allowed_efforts_for_model, extract_reasoning_from_model_name
+def build_reasoning_param(
+    base_effort: str = "medium",
+    base_summary: str = "auto",
+    overrides: Dict[str, Any] | None = None,
+    *,
+    allowed_efforts: frozenset[str] | None = None,
+) -> Dict[str, Any]:
+    effort = (base_effort or "").strip().lower()
+    summary = (base_summary or "").strip().lower()
+    valid_efforts = allowed_efforts or DEFAULT_REASONING_EFFORTS
+    valid_summaries = {"auto", "concise", "detailed", "none"}
+    if isinstance(overrides, dict):
+        o_eff = str(overrides.get("effort", "")).strip().lower()
+        o_sum = str(overrides.get("summary", "")).strip().lower()
+        if o_eff in valid_efforts and o_eff:
+            effort = o_eff
+        if o_sum in valid_summaries and o_sum:
+            summary = o_sum
+    if effort not in valid_efforts:
+        effort = "medium"
+    if summary not in valid_summaries:
+        summary = "auto"
+    reasoning: Dict[str, Any] = {"effort": effort}
+    if summary != "none":
+        reasoning["summary"] = summary
+    return reasoning
+def apply_reasoning_to_message(
+    message: Dict[str, Any],
+    reasoning_summary_text: str,
+    reasoning_full_text: str,
+    compat: str,
+) -> Dict[str, Any]:
+    try:
+        compat = (compat or "think-tags").strip().lower()
+    except Exception:
+        compat = "think-tags"
+    if compat == "o3":
+        rtxt_parts: list[str] = []
+        if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+            rtxt_parts.append(reasoning_summary_text)
+        if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+            rtxt_parts.append(reasoning_full_text)
+        rtxt = "\n\n".join([p for p in rtxt_parts if p])
+        if rtxt:
+            message["reasoning"] = {"content": [{"type": "text", "text": rtxt}]}
+        return message
+    if compat in ("legacy", "current"):
+        if reasoning_summary_text:
+            message["reasoning_summary"] = reasoning_summary_text
+        if reasoning_full_text:
+            message["reasoning"] = reasoning_full_text
+        return message
+    rtxt_parts: list[str] = []
+    if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+        rtxt_parts.append(reasoning_summary_text)
+    if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+        rtxt_parts.append(reasoning_full_text)
+    rtxt = "\n\n".join([p for p in rtxt_parts if p])
+    if rtxt:
+        think_block = f"<think>{rtxt}</think>"
+        content_text = message.get("content") or ""
+        if isinstance(content_text, str):
+            message["content"] = think_block + (content_text or "")
+    return message

build/lib/chatmock/responses_api.py ADDED Viewed

	@@ -0,0 +1,243 @@

+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, Iterator, List
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import ServiceTierResolution, resolve_service_tier
+from .model_registry import (
+    allowed_efforts_for_model,
+    extract_reasoning_from_model_name,
+    normalize_model_name,
+    uses_codex_instructions,
+)
+from .reasoning import build_reasoning_param
+from .session import ensure_session_id
+@dataclass(frozen=True)
+class ResponsesRequestError(Exception):
+    message: str
+    status_code: int = 400
+    code: str | None = None
+    def __str__(self) -> str:
+        return self.message
+@dataclass(frozen=True)
+class NormalizedResponsesRequest:
+    payload: Dict[str, Any]
+    requested_model: str | None
+    normalized_model: str
+    session_id: str
+    service_tier_resolution: ServiceTierResolution
+def instructions_for_model(config: Dict[str, Any], model: str) -> str:
+    base = config.get("BASE_INSTRUCTIONS", BASE_INSTRUCTIONS)
+    if uses_codex_instructions(model):
+        codex = config.get("GPT5_CODEX_INSTRUCTIONS") or GPT5_CODEX_INSTRUCTIONS
+        if isinstance(codex, str) and codex.strip():
+            return codex
+    return base
+def extract_client_session_id(headers: Any) -> str | None:
+    try:
+        return headers.get("X-Session-Id") or headers.get("session_id") or None
+    except Exception:
+        return None
+def _input_items_for_session(raw_input: Any) -> List[Dict[str, Any]]:
+    if isinstance(raw_input, list):
+        return [item for item in raw_input if isinstance(item, dict)]
+    if isinstance(raw_input, dict):
+        return [raw_input]
+    if isinstance(raw_input, str) and raw_input.strip():
+        return [
+            {
+                "type": "message",
+                "role": "user",
+                "content": [{"type": "input_text", "text": raw_input}],
+            }
+        ]
+    return []
+def canonicalize_responses_input(raw_input: Any) -> Any:
+    if isinstance(raw_input, list):
+        return [item for item in raw_input if isinstance(item, dict)]
+    if isinstance(raw_input, dict):
+        return [raw_input]
+    if isinstance(raw_input, str):
+        return _input_items_for_session(raw_input)
+    return raw_input
+def normalize_responses_payload(
+    payload: Dict[str, Any],
+    *,
+    config: Dict[str, Any],
+    client_session_id: str | None = None,
+) -> NormalizedResponsesRequest:
+    requested_model = payload.get("model") if isinstance(payload.get("model"), str) else None
+    normalized_model = normalize_model_name(requested_model, config.get("DEBUG_MODEL"))
+    normalized = dict(payload)
+    normalized["model"] = normalized_model
+    normalized.pop("max_output_tokens", None)
+    if "input" in normalized:
+        normalized["input"] = canonicalize_responses_input(normalized.get("input"))
+    if "store" not in normalized:
+        normalized["store"] = False
+    instructions = normalized.get("instructions")
+    if not isinstance(instructions, str) or not instructions.strip():
+        instructions = instructions_for_model(config, normalized_model)
+        normalized["instructions"] = instructions
+    reasoning_effort = config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = config.get("REASONING_SUMMARY", "auto")
+    reasoning_overrides = (
+        normalized.get("reasoning")
+        if isinstance(normalized.get("reasoning"), dict)
+        else extract_reasoning_from_model_name(requested_model)
+    )
+    normalized["reasoning"] = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(normalized_model),
+    )
+    include = normalized.get("include")
+    include_list = [item for item in include if isinstance(item, str)] if isinstance(include, list) else []
+    if "reasoning.encrypted_content" not in include_list:
+        include_list.append("reasoning.encrypted_content")
+    normalized["include"] = include_list
+    tools = normalized.get("tools")
+    if (not isinstance(tools, list) or not tools) and bool(config.get("DEFAULT_WEB_SEARCH")):
+        tool_choice = normalized.get("tool_choice")
+        if not (isinstance(tool_choice, str) and tool_choice.strip().lower() == "none"):
+            normalized["tools"] = [{"type": "web_search"}]
+    service_tier_resolution = resolve_service_tier(
+        normalized_model,
+        request_fast_mode=normalized.get("fast_mode"),
+        request_service_tier=normalized.get("service_tier"),
+        server_fast_mode=bool(config.get("FAST_MODE")),
+    )
+    if service_tier_resolution.error_message:
+        raise ResponsesRequestError(service_tier_resolution.error_message)
+    if service_tier_resolution.service_tier is None:
+        normalized.pop("service_tier", None)
+    else:
+        normalized["service_tier"] = service_tier_resolution.service_tier
+    normalized.pop("fast_mode", None)
+    input_items = _input_items_for_session(normalized.get("input"))
+    session_id = ensure_session_id(instructions, input_items, client_session_id)
+    prompt_cache_key = normalized.get("prompt_cache_key")
+    if not isinstance(prompt_cache_key, str) or not prompt_cache_key.strip():
+        normalized["prompt_cache_key"] = session_id
+    return NormalizedResponsesRequest(
+        payload=normalized,
+        requested_model=requested_model,
+        normalized_model=normalized_model,
+        session_id=session_id,
+        service_tier_resolution=service_tier_resolution,
+    )
+def iter_sse_event_payloads(upstream: Any) -> Iterator[Dict[str, Any]]:
+    for raw in upstream.iter_lines(decode_unicode=False):
+        if not raw:
+            continue
+        line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+        if not line.startswith("data: "):
+            continue
+        data = line[len("data: ") :].strip()
+        if not data or data == "[DONE]":
+            if data == "[DONE]":
+                break
+            continue
+        try:
+            evt = json.loads(data)
+        except Exception:
+            continue
+        if isinstance(evt, dict):
+            yield evt
+def aggregate_response_from_sse(
+    upstream: Any,
+    *,
+    on_event: Any | None = None,
+) -> tuple[Dict[str, Any] | None, Dict[str, Any] | None]:
+    response_obj: Dict[str, Any] | None = None
+    error_obj: Dict[str, Any] | None = None
+    try:
+        for evt in iter_sse_event_payloads(upstream):
+            if callable(on_event):
+                try:
+                    on_event(evt)
+                except Exception:
+                    pass
+            response = evt.get("response")
+            if isinstance(response, dict):
+                response_obj = response
+            kind = evt.get("type")
+            if kind == "response.failed":
+                if isinstance(response, dict) and isinstance(response.get("error"), dict):
+                    error_obj = {"error": response.get("error")}
+                else:
+                    error_obj = {"error": {"message": "response.failed"}}
+                break
+            if kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+    return response_obj, error_obj
+def stream_upstream_bytes(
+    upstream: Any,
+    *,
+    on_event: Any | None = None,
+) -> Iterable[bytes]:
+    buffer = b""
+    try:
+        for chunk in upstream.iter_content(chunk_size=None):
+            if chunk:
+                if callable(on_event):
+                    if isinstance(chunk, bytes):
+                        buffer += chunk
+                    else:
+                        buffer += str(chunk).encode("utf-8", errors="ignore")
+                    while b"\n" in buffer:
+                        line, buffer = buffer.split(b"\n", 1)
+                        line = line.rstrip(b"\r")
+                        if not line.startswith(b"data: "):
+                            continue
+                        data = line[len(b"data: ") :].strip()
+                        if not data or data == b"[DONE]":
+                            continue
+                        try:
+                            evt = json.loads(data.decode("utf-8", errors="ignore"))
+                        except Exception:
+                            evt = None
+                        if isinstance(evt, dict):
+                            try:
+                                on_event(evt)
+                            except Exception:
+                                pass
+                yield chunk
+    finally:
+        upstream.close()

build/lib/chatmock/routes_ollama.py ADDED Viewed

	@@ -0,0 +1,585 @@

+from __future__ import annotations
+import json
+import datetime
+import time
+from typing import Any, Dict, List
+from flask import Blueprint, Response, current_app, jsonify, make_response, request, stream_with_context
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import resolve_service_tier
+from .limits import record_rate_limits_from_response
+from .http import build_cors_headers
+from .model_registry import list_public_models, uses_codex_instructions
+from .responses_api import instructions_for_model
+from .reasoning import (
+    allowed_efforts_for_model,
+    build_reasoning_param,
+    extract_reasoning_from_model_name,
+)
+from .transform import convert_ollama_messages, normalize_ollama_tools
+from .upstream import normalize_model_name, start_upstream_request
+from .utils import convert_chat_messages_to_responses_input, convert_tools_chat_to_responses
+ollama_bp = Blueprint("ollama", __name__)
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+def _wrap_stream_logging(label: str, iterator, enabled: bool):
+    if not enabled:
+        return iterator
+    def _gen():
+        for chunk in iterator:
+            try:
+                text = (
+                    chunk.decode("utf-8", errors="replace")
+                    if isinstance(chunk, (bytes, bytearray))
+                    else str(chunk)
+                )
+                print(f"{label}\n{text}")
+            except Exception:
+                pass
+            yield chunk
+    return _gen()
+@ollama_bp.route("/api/version", methods=["GET"])
+def ollama_version() -> Response:
+    if bool(current_app.config.get("VERBOSE")):
+        print("IN GET /api/version")
+    version = current_app.config.get("OLLAMA_VERSION", "0.12.10")
+    if not isinstance(version, str) or not version.strip():
+        version = "0.12.10"
+    payload = {"version": version}
+    resp = make_response(jsonify(payload), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    if bool(current_app.config.get("VERBOSE")):
+        _log_json("OUT GET /api/version", payload)
+    return resp
+def _instructions_for_model(model: str) -> str:
+    return instructions_for_model(current_app.config, model)
+_OLLAMA_FAKE_EVAL = {
+    "total_duration": 8497226791,
+    "load_duration": 1747193958,
+    "prompt_eval_count": 24,
+    "prompt_eval_duration": 269219750,
+    "eval_count": 247,
+    "eval_duration": 6413802458,
+}
+@ollama_bp.route("/api/tags", methods=["GET"])
+def ollama_tags() -> Response:
+    if bool(current_app.config.get("VERBOSE")):
+        print("IN GET /api/tags")
+    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
+    model_ids = list_public_models(expose_reasoning_models=expose_variants)
+    models = []
+    for model_id in model_ids:
+        models.append(
+            {
+                "name": model_id,
+                "model": model_id,
+                "modified_at": "2023-10-01T00:00:00Z",
+                "size": 815319791,
+                "digest": "8648f39daa8fbf5b18c7b4e6a8fb4990c692751d49917417b8842ca5758e7ffc",
+                "details": {
+                    "parent_model": "",
+                    "format": "gguf",
+                    "family": "llama",
+                    "families": ["llama"],
+                    "parameter_size": "8.0B",
+                    "quantization_level": "Q4_0",
+                },
+            }
+        )
+    payload = {"models": models}
+    resp = make_response(jsonify(payload), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    if bool(current_app.config.get("VERBOSE")):
+        _log_json("OUT GET /api/tags", payload)
+    return resp
+@ollama_bp.route("/api/show", methods=["POST"])
+def ollama_show() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    raw_body = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /api/show\n" + raw_body)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw_body) if raw_body else (request.get_json(silent=True) or {})
+    except Exception:
+        payload = request.get_json(silent=True) or {}
+    model = payload.get("model")
+    if not isinstance(model, str) or not model.strip():
+        err = {"error": "Model not found"}
+        if verbose:
+            _log_json("OUT POST /api/show", err)
+        return jsonify(err), 400
+    v1_show_response = {
+        "modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llava:latest\n\nFROM /models/blobs/sha256:placeholder\nTEMPLATE \"\"\"{{ .System }}\nUSER: {{ .Prompt }}\nASSISTANT: \"\"\"\nPARAMETER num_ctx 100000\nPARAMETER stop \"</s>\"\nPARAMETER stop \"USER:\"\nPARAMETER stop \"ASSISTANT:\"",
+        "parameters": "num_keep 24\nstop \"<|start_header_id|>\"\nstop \"<|end_header_id|>\"\nstop \"<|eot_id|>\"",
+        "template": "{{ if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|>\n\n{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ .Response }}<|eot_id|>",
+        "details": {
+            "parent_model": "",
+            "format": "gguf",
+            "family": "llama",
+            "families": ["llama"],
+            "parameter_size": "8.0B",
+            "quantization_level": "Q4_0",
+        },
+        "model_info": {
+            "general.architecture": "llama",
+            "general.file_type": 2,
+            "llama.context_length": 2000000,
+        },
+        "capabilities": ["completion", "vision", "tools", "thinking"],
+    }
+    if verbose:
+        _log_json("OUT POST /api/show", v1_show_response)
+    resp = make_response(jsonify(v1_show_response), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+@ollama_bp.route("/api/chat", methods=["POST"])
+def ollama_chat() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+    reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
+    try:
+        raw = request.get_data(cache=True, as_text=True) or ""
+        if verbose:
+            print("IN POST /api/chat\n" + (raw if isinstance(raw, str) else ""))
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": "Invalid JSON body"}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+    model = payload.get("model")
+    raw_messages = payload.get("messages")
+    messages = convert_ollama_messages(
+        raw_messages, payload.get("images") if isinstance(payload.get("images"), list) else None
+    )
+    if isinstance(messages, list):
+        sys_idx = next((i for i, m in enumerate(messages) if isinstance(m, dict) and m.get("role") == "system"), None)
+        if isinstance(sys_idx, int):
+            sys_msg = messages.pop(sys_idx)
+            content = sys_msg.get("content") if isinstance(sys_msg, dict) else ""
+            messages.insert(0, {"role": "user", "content": content})
+    stream_req = payload.get("stream")
+    if stream_req is None:
+        stream_req = True
+    stream_req = bool(stream_req)
+    tools_req = payload.get("tools") if isinstance(payload.get("tools"), list) else []
+    tools_responses = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
+    tool_choice = payload.get("tool_choice", "auto")
+    parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
+    # Passthrough Responses API tools (web_search) via ChatMock extension fields
+    extra_tools: List[Dict[str, Any]] = []
+    had_responses_tools = False
+    rt_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
+    if isinstance(rt_payload, list):
+        for _t in rt_payload:
+            if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
+                continue
+            if _t.get("type") not in ("web_search", "web_search_preview"):
+                err = {"error": "Only web_search/web_search_preview are supported in responses_tools"}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), 400
+            extra_tools.append(_t)
+        if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
+            rtc = payload.get("responses_tool_choice")
+            if not (isinstance(rtc, str) and rtc == "none"):
+                extra_tools = [{"type": "web_search"}]
+        if extra_tools:
+            import json as _json
+            MAX_TOOLS_BYTES = 32768
+            try:
+                size = len(_json.dumps(extra_tools))
+            except Exception:
+                size = 0
+            if size > MAX_TOOLS_BYTES:
+                err = {"error": "responses_tools too large"}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), 400
+            had_responses_tools = True
+            tools_responses = (tools_responses or []) + extra_tools
+    rtc = payload.get("responses_tool_choice")
+    if isinstance(rtc, str) and rtc in ("auto", "none"):
+        tool_choice = rtc
+    if not isinstance(model, str) or not isinstance(messages, list) or not messages:
+        err = {"error": "Invalid request format"}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+    input_items = convert_chat_messages_to_responses_input(messages)
+    model_reasoning = extract_reasoning_from_model_name(model)
+    normalized_model = normalize_model_name(model, current_app.config.get("DEBUG_MODEL"))
+    service_tier_resolution = resolve_service_tier(
+        normalized_model,
+        request_fast_mode=payload.get("fast_mode"),
+        request_service_tier=payload.get("service_tier"),
+        server_fast_mode=bool(current_app.config.get("FAST_MODE")),
+    )
+    if service_tier_resolution.warning_message and verbose:
+        print(f"[FastMode] {service_tier_resolution.warning_message}")
+    if service_tier_resolution.error_message:
+        err = {"error": service_tier_resolution.error_message}
+        if verbose:
+            _log_json("OUT POST /api/chat", err)
+        return jsonify(err), 400
+    upstream, error_resp = start_upstream_request(
+        normalized_model,
+        input_items,
+        instructions=_instructions_for_model(normalized_model),
+        tools=tools_responses,
+        tool_choice=tool_choice,
+        parallel_tool_calls=parallel_tool_calls,
+        reasoning_param=build_reasoning_param(
+            reasoning_effort,
+            reasoning_summary,
+            model_reasoning,
+            allowed_efforts=allowed_efforts_for_model(model),
+        ),
+        service_tier=service_tier_resolution.service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /api/chat", parsed)
+            except Exception:
+                pass
+        return error_resp
+    record_rate_limits_from_response(upstream)
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        if had_responses_tools:
+            if verbose:
+                print("[Passthrough] Upstream rejected tools; retrying without extras (args redacted)")
+            base_tools_only = convert_tools_chat_to_responses(normalize_ollama_tools(tools_req))
+            safe_choice = payload.get("tool_choice", "auto")
+            upstream2, err2 = start_upstream_request(
+                normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
+                input_items,
+                instructions=BASE_INSTRUCTIONS,
+                tools=base_tools_only,
+                tool_choice=safe_choice,
+                parallel_tool_calls=parallel_tool_calls,
+                reasoning_param=build_reasoning_param(
+                    reasoning_effort,
+                    reasoning_summary,
+                    model_reasoning,
+                    allowed_efforts=allowed_efforts_for_model(model),
+                ),
+                service_tier=service_tier_resolution.service_tier,
+            )
+            record_rate_limits_from_response(upstream2)
+            if err2 is None and upstream2 is not None and upstream2.status_code < 400:
+                upstream = upstream2
+            else:
+                err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error"), "code": "RESPONSES_TOOLS_REJECTED"}}
+                if verbose:
+                    _log_json("OUT POST /api/chat", err)
+                return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
+        else:
+            if verbose:
+                print("/api/chat upstream error status=", upstream.status_code, " body:", json.dumps(err_body)[:2000])
+            err = {"error": (err_body.get("error", {}) or {}).get("message", "Upstream error")}
+            if verbose:
+                _log_json("OUT POST /api/chat", err)
+            return jsonify(err), upstream.status_code
+    created_at = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")
+    model_out = model if isinstance(model, str) and model.strip() else normalized_model
+    if stream_req:
+        def _gen():
+            compat = (current_app.config.get("REASONING_COMPAT", "think-tags") or "think-tags").strip().lower()
+            think_open = False
+            think_closed = False
+            saw_any_summary = False
+            pending_summary_paragraph = False
+            full_parts: List[str] = []
+            try:
+                for raw_line in upstream.iter_lines(decode_unicode=False):
+                    if not raw_line:
+                        continue
+                    line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+                    if not line.startswith("data: "):
+                        continue
+                    data = line[len("data: "):].strip()
+                    if not data:
+                        continue
+                    if data == "[DONE]":
+                        break
+                    try:
+                        evt = json.loads(data)
+                    except Exception:
+                        continue
+                    kind = evt.get("type")
+                    if kind == "response.reasoning_summary_part.added":
+                        if compat in ("think-tags", "o3"):
+                            if saw_any_summary:
+                                pending_summary_paragraph = True
+                            else:
+                                saw_any_summary = True
+                    elif kind in ("response.reasoning_summary_text.delta", "response.reasoning_text.delta"):
+                        delta_txt = evt.get("delta") or ""
+                        if compat == "o3":
+                            if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": "\n"},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append("\n")
+                                pending_summary_paragraph = False
+                            if delta_txt:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": delta_txt},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append(delta_txt)
+                        elif compat == "think-tags":
+                            if not think_open and not think_closed:
+                                yield (
+                                    json.dumps(
+                                        {
+                                            "model": model_out,
+                                            "created_at": created_at,
+                                            "message": {"role": "assistant", "content": "<think>"},
+                                            "done": False,
+                                        }
+                                    )
+                                    + "\n"
+                                )
+                                full_parts.append("<think>")
+                                think_open = True
+                            if think_open and not think_closed:
+                                if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                                    yield (
+                                        json.dumps(
+                                            {
+                                                "model": model_out,
+                                                "created_at": created_at,
+                                                "message": {"role": "assistant", "content": "\n"},
+                                                "done": False,
+                                            }
+                                        )
+                                        + "\n"
+                                    )
+                                    full_parts.append("\n")
+                                    pending_summary_paragraph = False
+                                if delta_txt:
+                                    yield (
+                                        json.dumps(
+                                            {
+                                                "model": model_out,
+                                                "created_at": created_at,
+                                                "message": {"role": "assistant", "content": delta_txt},
+                                                "done": False,
+                                            }
+                                        )
+                                        + "\n"
+                                    )
+                                    full_parts.append(delta_txt)
+                        else:
+                            pass
+                    elif kind == "response.output_text.delta":
+                        delta = evt.get("delta") or ""
+                        if compat == "think-tags" and think_open and not think_closed:
+                            yield (
+                                json.dumps(
+                                    {
+                                        "model": model_out,
+                                        "created_at": created_at,
+                                        "message": {"role": "assistant", "content": "</think>"},
+                                        "done": False,
+                                    }
+                                )
+                                + "\n"
+                            )
+                            full_parts.append("</think>")
+                            think_open = False
+                            think_closed = True
+                        if delta:
+                            yield (
+                                json.dumps(
+                                    {
+                                        "model": model_out,
+                                        "created_at": created_at,
+                                        "message": {"role": "assistant", "content": delta},
+                                        "done": False,
+                                    }
+                                )
+                                + "\n"
+                            )
+                            full_parts.append(delta)
+                    elif kind == "response.completed":
+                        break
+            finally:
+                upstream.close()
+                if compat == "think-tags" and think_open and not think_closed:
+                    yield (
+                        json.dumps(
+                            {
+                                "model": model_out,
+                                "created_at": created_at,
+                                "message": {"role": "assistant", "content": "</think>"},
+                                "done": False,
+                            }
+                        )
+                        + "\n"
+                    )
+                    full_parts.append("</think>")
+                done_obj = {
+                    "model": model_out,
+                    "created_at": created_at,
+                    "message": {"role": "assistant", "content": ""},
+                    "done": True,
+                }
+                done_obj.update(_OLLAMA_FAKE_EVAL)
+                yield json.dumps(done_obj) + "\n"
+        if verbose:
+            print("OUT POST /api/chat (streaming response)")
+        stream_iter = stream_with_context(_gen())
+        stream_iter = _wrap_stream_logging("STREAM OUT /api/chat", stream_iter, verbose)
+        resp = current_app.response_class(
+            stream_iter,
+            status=200,
+            mimetype="application/x-ndjson",
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    full_text = ""
+    reasoning_summary_text = ""
+    reasoning_full_text = ""
+    tool_calls: List[Dict[str, Any]] = []
+    try:
+        for raw in upstream.iter_lines(decode_unicode=False):
+            if not raw:
+                continue
+            line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data:
+                continue
+            if data == "[DONE]":
+                break
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_summary_text.delta":
+                reasoning_summary_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_text.delta":
+                reasoning_full_text += evt.get("delta") or ""
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and item.get("type") == "function_call":
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ""
+                    args = item.get("arguments") or ""
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        tool_calls.append(
+                            {
+                                "id": call_id,
+                                "type": "function",
+                                "function": {"name": name, "arguments": args},
+                            }
+                        )
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+    if (current_app.config.get("REASONING_COMPAT", "think-tags") or "think-tags").strip().lower() == "think-tags":
+        rtxt_parts = []
+        if isinstance(reasoning_summary_text, str) and reasoning_summary_text.strip():
+            rtxt_parts.append(reasoning_summary_text)
+        if isinstance(reasoning_full_text, str) and reasoning_full_text.strip():
+            rtxt_parts.append(reasoning_full_text)
+        rtxt = "\n\n".join([p for p in rtxt_parts if p])
+        if rtxt:
+            full_text = f"<think>{rtxt}</think>" + (full_text or "")
+    out_json = {
+        "model": normalize_model_name(model, current_app.config.get("DEBUG_MODEL")),
+        "created_at": created_at,
+        "message": {"role": "assistant", "content": full_text, **({"tool_calls": tool_calls} if tool_calls else {})},
+        "done": True,
+        "done_reason": "stop",
+    }
+    out_json.update(_OLLAMA_FAKE_EVAL)
+    if verbose:
+        _log_json("OUT POST /api/chat", out_json)
+    resp = make_response(jsonify(out_json), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp

build/lib/chatmock/routes_openai.py ADDED Viewed

	@@ -0,0 +1,738 @@

+from __future__ import annotations
+import json
+import time
+from typing import Any, Dict, List
+from flask import Blueprint, Response, current_app, jsonify, make_response, request
+from .config import BASE_INSTRUCTIONS, GPT5_CODEX_INSTRUCTIONS
+from .fast_mode import resolve_service_tier
+from .limits import record_rate_limits_from_response
+from .http import build_cors_headers
+from .model_registry import list_public_models, uses_codex_instructions
+from .responses_api import (
+    ResponsesRequestError,
+    aggregate_response_from_sse,
+    extract_client_session_id,
+    instructions_for_model,
+    normalize_responses_payload,
+    stream_upstream_bytes,
+)
+from .reasoning import (
+    allowed_efforts_for_model,
+    apply_reasoning_to_message,
+    build_reasoning_param,
+    extract_reasoning_from_model_name,
+)
+from .session import (
+    clear_responses_reuse_state,
+    note_responses_final_response,
+    note_responses_stream_event,
+    prepare_responses_request_for_session,
+)
+from .upstream import normalize_model_name, start_upstream_raw_request, start_upstream_request
+from .utils import (
+    convert_chat_messages_to_responses_input,
+    convert_tools_chat_to_responses,
+    sse_translate_chat,
+    sse_translate_text,
+)
+openai_bp = Blueprint("openai", __name__)
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+def _wrap_stream_logging(label: str, iterator, enabled: bool):
+    if not enabled:
+        return iterator
+    def _gen():
+        for chunk in iterator:
+            try:
+                text = (
+                    chunk.decode("utf-8", errors="replace")
+                    if isinstance(chunk, (bytes, bytearray))
+                    else str(chunk)
+                )
+                print(f"{label}\n{text}")
+            except Exception:
+                pass
+            yield chunk
+    return _gen()
+def _instructions_for_model(model: str) -> str:
+    return instructions_for_model(current_app.config, model)
+def _service_tier_from_payload(
+    model: str,
+    payload: Dict[str, Any],
+    *,
+    verbose: bool = False,
+) -> tuple[str | None, Response | None]:
+    resolution = resolve_service_tier(
+        model,
+        request_fast_mode=payload.get("fast_mode"),
+        request_service_tier=payload.get("service_tier"),
+        server_fast_mode=bool(current_app.config.get("FAST_MODE")),
+    )
+    if resolution.warning_message and verbose:
+        print(f"[FastMode] {resolution.warning_message}")
+    if resolution.error_message:
+        err = {"error": {"message": resolution.error_message}}
+        if verbose:
+            _log_json("OUT POST service_tier resolution", err)
+        resp = make_response(jsonify(err), 400)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+    return resolution.service_tier, None
+@openai_bp.route("/v1/chat/completions", methods=["POST"])
+def chat_completions() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+    reasoning_compat = current_app.config.get("REASONING_COMPAT", "think-tags")
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/chat/completions\n" + raw)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        try:
+            payload = json.loads(raw.replace("\r", "").replace("\n", ""))
+        except Exception:
+            err = {"error": {"message": "Invalid JSON body"}}
+            if verbose:
+                _log_json("OUT POST /v1/chat/completions", err)
+            return jsonify(err), 400
+    requested_model = payload.get("model")
+    model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
+    messages = payload.get("messages")
+    if messages is None and isinstance(payload.get("prompt"), str):
+        messages = [{"role": "user", "content": payload.get("prompt") or ""}]
+    if messages is None and isinstance(payload.get("input"), str):
+        messages = [{"role": "user", "content": payload.get("input") or ""}]
+    if messages is None:
+        messages = []
+    if not isinstance(messages, list):
+        err = {"error": {"message": "Request must include messages: []"}}
+        if verbose:
+            _log_json("OUT POST /v1/chat/completions", err)
+        return jsonify(err), 400
+    if isinstance(messages, list):
+        sys_idx = next((i for i, m in enumerate(messages) if isinstance(m, dict) and m.get("role") == "system"), None)
+        if isinstance(sys_idx, int):
+            sys_msg = messages.pop(sys_idx)
+            content = sys_msg.get("content") if isinstance(sys_msg, dict) else ""
+            messages.insert(0, {"role": "user", "content": content})
+    is_stream = bool(payload.get("stream"))
+    stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {}
+    include_usage = bool(stream_options.get("include_usage", False))
+    tools_responses = convert_tools_chat_to_responses(payload.get("tools"))
+    tool_choice = payload.get("tool_choice", "auto")
+    parallel_tool_calls = bool(payload.get("parallel_tool_calls", False))
+    responses_tools_payload = payload.get("responses_tools") if isinstance(payload.get("responses_tools"), list) else []
+    extra_tools: List[Dict[str, Any]] = []
+    had_responses_tools = False
+    if isinstance(responses_tools_payload, list):
+        for _t in responses_tools_payload:
+            if not (isinstance(_t, dict) and isinstance(_t.get("type"), str)):
+                continue
+            if _t.get("type") not in ("web_search", "web_search_preview"):
+                err = {
+                    "error": {
+                        "message": "Only web_search/web_search_preview are supported in responses_tools",
+                        "code": "RESPONSES_TOOL_UNSUPPORTED",
+                    }
+                }
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), 400
+            extra_tools.append(_t)
+        if not extra_tools and bool(current_app.config.get("DEFAULT_WEB_SEARCH")):
+            responses_tool_choice = payload.get("responses_tool_choice")
+            if not (isinstance(responses_tool_choice, str) and responses_tool_choice == "none"):
+                extra_tools = [{"type": "web_search"}]
+        if extra_tools:
+            import json as _json
+            MAX_TOOLS_BYTES = 32768
+            try:
+                size = len(_json.dumps(extra_tools))
+            except Exception:
+                size = 0
+            if size > MAX_TOOLS_BYTES:
+                err = {"error": {"message": "responses_tools too large", "code": "RESPONSES_TOOLS_TOO_LARGE"}}
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), 400
+            had_responses_tools = True
+            tools_responses = (tools_responses or []) + extra_tools
+    responses_tool_choice = payload.get("responses_tool_choice")
+    if isinstance(responses_tool_choice, str) and responses_tool_choice in ("auto", "none"):
+        tool_choice = responses_tool_choice
+    input_items = convert_chat_messages_to_responses_input(messages)
+    if not input_items and isinstance(payload.get("prompt"), str) and payload.get("prompt").strip():
+        input_items = [
+            {"type": "message", "role": "user", "content": [{"type": "input_text", "text": payload.get("prompt")}]}
+        ]
+    model_reasoning = extract_reasoning_from_model_name(requested_model)
+    reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
+    reasoning_param = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(model),
+    )
+    service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose)
+    if tier_error is not None:
+        return tier_error
+    upstream, error_resp = start_upstream_request(
+        model,
+        input_items,
+        instructions=_instructions_for_model(model),
+        tools=tools_responses,
+        tool_choice=tool_choice,
+        parallel_tool_calls=parallel_tool_calls,
+        reasoning_param=reasoning_param,
+        service_tier=service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/chat/completions", parsed)
+            except Exception:
+                pass
+        return error_resp
+    record_rate_limits_from_response(upstream)
+    created = int(time.time())
+    if upstream.status_code >= 400:
+        try:
+            raw = upstream.content
+            err_body = json.loads(raw.decode("utf-8", errors="ignore")) if raw else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        if had_responses_tools:
+            if verbose:
+                print("[Passthrough] Upstream rejected tools; retrying without extra tools (args redacted)")
+            base_tools_only = convert_tools_chat_to_responses(payload.get("tools"))
+            safe_choice = payload.get("tool_choice", "auto")
+            upstream2, err2 = start_upstream_request(
+                model,
+                input_items,
+                instructions=BASE_INSTRUCTIONS,
+                tools=base_tools_only,
+                tool_choice=safe_choice,
+                parallel_tool_calls=parallel_tool_calls,
+                reasoning_param=reasoning_param,
+                service_tier=service_tier,
+            )
+            record_rate_limits_from_response(upstream2)
+            if err2 is None and upstream2 is not None and upstream2.status_code < 400:
+                upstream = upstream2
+            else:
+                err = {
+                    "error": {
+                        "message": (err_body.get("error", {}) or {}).get("message", "Upstream error"),
+                        "code": "RESPONSES_TOOLS_REJECTED",
+                    }
+                }
+                if verbose:
+                    _log_json("OUT POST /v1/chat/completions", err)
+                return jsonify(err), (upstream2.status_code if upstream2 is not None else upstream.status_code)
+        else:
+            if verbose:
+                print("Upstream error status=", upstream.status_code)
+            err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
+            if verbose:
+                _log_json("OUT POST /v1/chat/completions", err)
+            return jsonify(err), upstream.status_code
+    if is_stream:
+        if verbose:
+            print("OUT POST /v1/chat/completions (streaming response)")
+        stream_iter = sse_translate_chat(
+            upstream,
+            requested_model or model,
+            created,
+            verbose=verbose_obfuscation,
+            vlog=print if verbose_obfuscation else None,
+            reasoning_compat=reasoning_compat,
+            include_usage=include_usage,
+        )
+        stream_iter = _wrap_stream_logging("STREAM OUT /v1/chat/completions", stream_iter, verbose)
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    full_text = ""
+    reasoning_summary_text = ""
+    reasoning_full_text = ""
+    response_id = "chatcmpl"
+    tool_calls: List[Dict[str, Any]] = []
+    error_message: str | None = None
+    usage_obj: Dict[str, int] | None = None
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw in upstream.iter_lines(decode_unicode=False):
+            if not raw:
+                continue
+            line = raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else raw
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data:
+                continue
+            if data == "[DONE]":
+                break
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            mu = _extract_usage(evt)
+            if mu:
+                usage_obj = mu
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_summary_text.delta":
+                reasoning_summary_text += evt.get("delta") or ""
+            elif kind == "response.reasoning_text.delta":
+                reasoning_full_text += evt.get("delta") or ""
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and item.get("type") == "function_call":
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ""
+                    args = item.get("arguments") or ""
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        tool_calls.append(
+                            {
+                                "id": call_id,
+                                "type": "function",
+                                "function": {"name": name, "arguments": args},
+                            }
+                        )
+            elif kind == "response.failed":
+                error_message = evt.get("response", {}).get("error", {}).get("message", "response.failed")
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+    if error_message:
+        resp = make_response(jsonify({"error": {"message": error_message}}), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    message: Dict[str, Any] = {"role": "assistant", "content": full_text if full_text else None}
+    if tool_calls:
+        message["tool_calls"] = tool_calls
+    message = apply_reasoning_to_message(message, reasoning_summary_text, reasoning_full_text, reasoning_compat)
+    completion = {
+        "id": response_id or "chatcmpl",
+        "object": "chat.completion",
+        "created": created,
+        "model": requested_model or model,
+        "choices": [
+            {
+                "index": 0,
+                "message": message,
+                "finish_reason": "stop",
+            }
+        ],
+        **({"usage": usage_obj} if usage_obj else {}),
+    }
+    if verbose:
+        _log_json("OUT POST /v1/chat/completions", completion)
+    resp = make_response(jsonify(completion), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+@openai_bp.route("/v1/completions", methods=["POST"])
+def completions() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    verbose_obfuscation = bool(current_app.config.get("VERBOSE_OBFUSCATION"))
+    reasoning_effort = current_app.config.get("REASONING_EFFORT", "medium")
+    reasoning_summary = current_app.config.get("REASONING_SUMMARY", "auto")
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/completions\n" + raw)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": {"message": "Invalid JSON body"}}
+        if verbose:
+            _log_json("OUT POST /v1/completions", err)
+        return jsonify(err), 400
+    requested_model = payload.get("model")
+    model = normalize_model_name(requested_model, current_app.config.get("DEBUG_MODEL"))
+    prompt = payload.get("prompt")
+    if isinstance(prompt, list):
+        prompt = "".join([p if isinstance(p, str) else "" for p in prompt])
+    if not isinstance(prompt, str):
+        prompt = payload.get("suffix") or ""
+    stream_req = bool(payload.get("stream", False))
+    stream_options = payload.get("stream_options") if isinstance(payload.get("stream_options"), dict) else {}
+    include_usage = bool(stream_options.get("include_usage", False))
+    messages = [{"role": "user", "content": prompt or ""}]
+    input_items = convert_chat_messages_to_responses_input(messages)
+    model_reasoning = extract_reasoning_from_model_name(requested_model)
+    reasoning_overrides = payload.get("reasoning") if isinstance(payload.get("reasoning"), dict) else model_reasoning
+    reasoning_param = build_reasoning_param(
+        reasoning_effort,
+        reasoning_summary,
+        reasoning_overrides,
+        allowed_efforts=allowed_efforts_for_model(model),
+    )
+    service_tier, tier_error = _service_tier_from_payload(model, payload, verbose=verbose)
+    if tier_error is not None:
+        return tier_error
+    upstream, error_resp = start_upstream_request(
+        model,
+        input_items,
+        instructions=_instructions_for_model(model),
+        reasoning_param=reasoning_param,
+        service_tier=service_tier,
+    )
+    if error_resp is not None:
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/completions", parsed)
+            except Exception:
+                pass
+        return error_resp
+    record_rate_limits_from_response(upstream)
+    created = int(time.time())
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"raw": upstream.text}
+        except Exception:
+            err_body = {"raw": upstream.text}
+        err = {"error": {"message": (err_body.get("error", {}) or {}).get("message", "Upstream error")}}
+        if verbose:
+            _log_json("OUT POST /v1/completions", err)
+        return jsonify(err), upstream.status_code
+    if stream_req:
+        if verbose:
+            print("OUT POST /v1/completions (streaming response)")
+        stream_iter = sse_translate_text(
+            upstream,
+            requested_model or model,
+            created,
+            verbose=verbose_obfuscation,
+            vlog=(print if verbose_obfuscation else None),
+            include_usage=include_usage,
+        )
+        stream_iter = _wrap_stream_logging("STREAM OUT /v1/completions", stream_iter, verbose)
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    full_text = ""
+    response_id = "cmpl"
+    usage_obj: Dict[str, int] | None = None
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw_line in upstream.iter_lines(decode_unicode=False):
+            if not raw_line:
+                continue
+            line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data or data == "[DONE]":
+                if data == "[DONE]":
+                    break
+                continue
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            mu = _extract_usage(evt)
+            if mu:
+                usage_obj = mu
+            kind = evt.get("type")
+            if kind == "response.output_text.delta":
+                full_text += evt.get("delta") or ""
+            elif kind == "response.completed":
+                break
+    finally:
+        upstream.close()
+    completion = {
+        "id": response_id or "cmpl",
+        "object": "text_completion",
+        "created": created,
+        "model": requested_model or model,
+        "choices": [
+            {"index": 0, "text": full_text, "finish_reason": "stop", "logprobs": None}
+        ],
+        **({"usage": usage_obj} if usage_obj else {}),
+    }
+    if verbose:
+        _log_json("OUT POST /v1/completions", completion)
+    resp = make_response(jsonify(completion), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+@openai_bp.route("/v1/responses", methods=["POST"])
+def responses_create() -> Response:
+    verbose = bool(current_app.config.get("VERBOSE"))
+    raw = request.get_data(cache=True, as_text=True) or ""
+    if verbose:
+        try:
+            print("IN POST /v1/responses\n" + raw)
+        except Exception:
+            pass
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        err = {"error": {"message": "Invalid JSON body"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), 400
+    if not isinstance(payload, dict):
+        err = {"error": {"message": "Request body must be a JSON object"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), 400
+    try:
+        normalized = normalize_responses_payload(
+            payload,
+            config=current_app.config,
+            client_session_id=extract_client_session_id(request.headers),
+        )
+    except ResponsesRequestError as exc:
+        err: Dict[str, Any] = {"error": {"message": str(exc)}}
+        if exc.code:
+            err["error"]["code"] = exc.code
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        return jsonify(err), exc.status_code
+    if normalized.service_tier_resolution.warning_message and verbose:
+        print(f"[FastMode] {normalized.service_tier_resolution.warning_message}")
+    prepared = prepare_responses_request_for_session(
+        normalized.session_id,
+        normalized.payload,
+        allow_previous_response_id=False,
+    )
+    stream_req = bool(prepared.payload.get("stream", False))
+    upstream_payload = dict(prepared.payload)
+    upstream_payload["stream"] = True
+    upstream, error_resp = start_upstream_raw_request(
+        upstream_payload,
+        session_id=normalized.session_id,
+        stream=True,
+    )
+    if error_resp is not None:
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            try:
+                body = error_resp.get_data(as_text=True)
+                if body:
+                    try:
+                        parsed = json.loads(body)
+                    except Exception:
+                        parsed = body
+                    _log_json("OUT POST /v1/responses", parsed)
+            except Exception:
+                pass
+        return error_resp
+    record_rate_limits_from_response(upstream)
+    if upstream.status_code >= 400:
+        try:
+            err_body = json.loads(upstream.content.decode("utf-8", errors="ignore")) if upstream.content else {"error": {"message": upstream.text}}
+        except Exception:
+            err_body = {"error": {"message": upstream.text or "Upstream error"}}
+        finally:
+            upstream.close()
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            _log_json("OUT POST /v1/responses", err_body)
+        resp = make_response(jsonify(err_body), upstream.status_code)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    if stream_req:
+        if verbose:
+            print("OUT POST /v1/responses (streaming response)")
+        stream_iter = _wrap_stream_logging(
+            "STREAM OUT /v1/responses",
+            stream_upstream_bytes(
+                upstream,
+                on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+            ),
+            verbose,
+        )
+        resp = Response(
+            stream_iter,
+            status=upstream.status_code,
+            mimetype="text/event-stream",
+            headers={"Cache-Control": "no-cache", "Connection": "keep-alive"},
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    content_type = upstream.headers.get("Content-Type", "")
+    if "application/json" in content_type.lower():
+        try:
+            body = upstream.json()
+        except Exception:
+            body = None
+        finally:
+            upstream.close()
+        if isinstance(body, dict):
+            note_responses_final_response(normalized.session_id, body)
+            if verbose:
+                _log_json("OUT POST /v1/responses", body)
+            resp = make_response(jsonify(body), upstream.status_code)
+            for k, v in build_cors_headers().items():
+                resp.headers.setdefault(k, v)
+            return resp
+    response_obj, error_obj = aggregate_response_from_sse(
+        upstream,
+        on_event=lambda evt: note_responses_stream_event(normalized.session_id, evt),
+    )
+    if error_obj is not None:
+        clear_responses_reuse_state(normalized.session_id)
+        if verbose:
+            _log_json("OUT POST /v1/responses", error_obj)
+        resp = make_response(jsonify(error_obj), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    if response_obj is None:
+        clear_responses_reuse_state(normalized.session_id)
+        err = {"error": {"message": "Upstream response stream did not contain a completed response object"}}
+        if verbose:
+            _log_json("OUT POST /v1/responses", err)
+        resp = make_response(jsonify(err), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return resp
+    if verbose:
+        _log_json("OUT POST /v1/responses", response_obj)
+    resp = make_response(jsonify(response_obj), upstream.status_code)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp
+@openai_bp.route("/v1/models", methods=["GET"])
+def list_models() -> Response:
+    expose_variants = bool(current_app.config.get("EXPOSE_REASONING_MODELS"))
+    model_ids = list_public_models(expose_reasoning_models=expose_variants)
+    data = [{"id": mid, "object": "model", "owned_by": "owner"} for mid in model_ids]
+    models = {"object": "list", "data": data}
+    resp = make_response(jsonify(models), 200)
+    for k, v in build_cors_headers().items():
+        resp.headers.setdefault(k, v)
+    return resp

build/lib/chatmock/session.py ADDED Viewed

	@@ -0,0 +1,312 @@

+from __future__ import annotations
+import copy
+import hashlib
+import json
+import threading
+import uuid
+from dataclasses import dataclass, field
+from typing import Any, Dict, List
+_LOCK = threading.Lock()
+_FINGERPRINT_TO_UUID: Dict[str, str] = {}
+_ORDER: List[str] = []
+_MAX_ENTRIES = 10000
+_RESPONSES_SESSION_STATE: Dict[str, "_ResponsesSessionState"] = {}
+_RESPONSES_ORDER: List[str] = []
+@dataclass(frozen=True)
+class PreparedResponsesRequest:
+    payload: Dict[str, Any]
+    session_id: str
+@dataclass
+class _ResponsesSessionState:
+    last_request_payload: Dict[str, Any] | None = None
+    last_response_id: str | None = None
+    last_response_items: List[Dict[str, Any]] = field(default_factory=list)
+    inflight_request_payload: Dict[str, Any] | None = None
+    inflight_track_result: bool = False
+    inflight_response_id: str | None = None
+    inflight_response_items: List[Dict[str, Any]] = field(default_factory=list)
+def _canonicalize_first_user_message(input_items: List[Dict[str, Any]]) -> Dict[str, Any] | None:
+    """
+    Extract the first stable user message from Responses input items. Good use for a fingerprint for prompt caching.
+    """
+    for item in input_items:
+        if not isinstance(item, dict):
+            continue
+        if item.get("type") != "message":
+            continue
+        role = item.get("role")
+        if role != "user":
+            continue
+        content = item.get("content")
+        if not isinstance(content, list):
+            continue
+        norm_content = []
+        for part in content:
+            if not isinstance(part, dict):
+                continue
+            ptype = part.get("type")
+            if ptype == "input_text":
+                text = part.get("text") if isinstance(part.get("text"), str) else ""
+                if text:
+                    norm_content.append({"type": "input_text", "text": text})
+            elif ptype == "input_image":
+                url = part.get("image_url") if isinstance(part.get("image_url"), str) else None
+                if url:
+                    norm_content.append({"type": "input_image", "image_url": url})
+        if norm_content:
+            return {"type": "message", "role": "user", "content": norm_content}
+    return None
+def canonicalize_prefix(instructions: str | None, input_items: List[Dict[str, Any]]) -> str:
+    prefix: Dict[str, Any] = {}
+    if isinstance(instructions, str) and instructions.strip():
+        prefix["instructions"] = instructions.strip()
+    first_user = _canonicalize_first_user_message(input_items)
+    if first_user is not None:
+        prefix["first_user_message"] = first_user
+    return json.dumps(prefix, sort_keys=True, separators=(",", ":"))
+def _fingerprint(s: str) -> str:
+    return hashlib.sha256(s.encode("utf-8")).hexdigest()
+def _remember(fp: str, sid: str) -> None:
+    if fp in _FINGERPRINT_TO_UUID:
+        return
+    _FINGERPRINT_TO_UUID[fp] = sid
+    _ORDER.append(fp)
+    if len(_ORDER) > _MAX_ENTRIES:
+        oldest = _ORDER.pop(0)
+        _FINGERPRINT_TO_UUID.pop(oldest, None)
+def _remember_responses_session(session_id: str) -> _ResponsesSessionState:
+    state = _RESPONSES_SESSION_STATE.get(session_id)
+    if state is None:
+        state = _ResponsesSessionState()
+        _RESPONSES_SESSION_STATE[session_id] = state
+        _RESPONSES_ORDER.append(session_id)
+        if len(_RESPONSES_ORDER) > _MAX_ENTRIES:
+            oldest = _RESPONSES_ORDER.pop(0)
+            _RESPONSES_SESSION_STATE.pop(oldest, None)
+    return state
+def _request_without_input(payload: Dict[str, Any]) -> Dict[str, Any]:
+    clone = copy.deepcopy(payload)
+    clone["input"] = []
+    clone.pop("previous_response_id", None)
+    return clone
+def _input_list(payload: Dict[str, Any]) -> List[Dict[str, Any]] | None:
+    raw = payload.get("input")
+    if not isinstance(raw, list):
+        return None
+    return [item for item in copy.deepcopy(raw) if isinstance(item, dict)]
+def _conversation_output_items(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    reusable: List[Dict[str, Any]] = []
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        item_type = item.get("type")
+        if item_type == "reasoning":
+            continue
+        reusable.append(copy.deepcopy(item))
+    return reusable
+def _clear_reuse_state(state: _ResponsesSessionState) -> None:
+    state.last_request_payload = None
+    state.last_response_id = None
+    state.last_response_items = []
+    state.inflight_request_payload = None
+    state.inflight_track_result = False
+    state.inflight_response_id = None
+    state.inflight_response_items = []
+def _clear_inflight(state: _ResponsesSessionState) -> None:
+    state.inflight_request_payload = None
+    state.inflight_track_result = False
+    state.inflight_response_id = None
+    state.inflight_response_items = []
+def ensure_session_id(
+    instructions: str | None,
+    input_items: List[Dict[str, Any]],
+    client_supplied: str | None = None,
+) -> str:
+    if isinstance(client_supplied, str) and client_supplied.strip():
+        return client_supplied.strip()
+    canon = canonicalize_prefix(instructions, input_items)
+    fp = _fingerprint(canon)
+    with _LOCK:
+        if fp in _FINGERPRINT_TO_UUID:
+            return _FINGERPRINT_TO_UUID[fp]
+        sid = str(uuid.uuid4())
+        _remember(fp, sid)
+        return sid
+def prepare_responses_request_for_session(
+    session_id: str,
+    payload: Dict[str, Any],
+    *,
+    allow_previous_response_id: bool = True,
+) -> PreparedResponsesRequest:
+    full_payload = copy.deepcopy(payload)
+    outbound_payload = copy.deepcopy(payload)
+    explicit_previous_response_id = (
+        isinstance(full_payload.get("previous_response_id"), str)
+        and bool(full_payload.get("previous_response_id").strip())
+    )
+    with _LOCK:
+        state = _remember_responses_session(session_id)
+        if explicit_previous_response_id:
+            _clear_reuse_state(state)
+            return PreparedResponsesRequest(
+                payload=outbound_payload,
+                session_id=session_id,
+            )
+        request_input = _input_list(full_payload)
+        if (
+            allow_previous_response_id
+            and
+            state.last_request_payload is not None
+            and state.last_response_id
+            and request_input is not None
+            and _request_without_input(state.last_request_payload) == _request_without_input(full_payload)
+        ):
+            baseline: List[Dict[str, Any]] = []
+            previous_input = _input_list(state.last_request_payload)
+            if previous_input is not None:
+                baseline.extend(previous_input)
+            baseline.extend(copy.deepcopy(state.last_response_items))
+            baseline_len = len(baseline)
+            if request_input[:baseline_len] == baseline and baseline_len <= len(request_input):
+                outbound_payload["input"] = copy.deepcopy(request_input[baseline_len:])
+                outbound_payload["previous_response_id"] = state.last_response_id
+        state.inflight_request_payload = full_payload
+        state.inflight_track_result = True
+        state.inflight_response_id = None
+        state.inflight_response_items = []
+    return PreparedResponsesRequest(
+        payload=outbound_payload,
+        session_id=session_id,
+    )
+def note_responses_stream_event(session_id: str, event: Dict[str, Any]) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    if not isinstance(event, dict):
+        return
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+        kind = event.get("type")
+        if kind == "response.created":
+            response = event.get("response")
+            if isinstance(response, dict) and isinstance(response.get("id"), str):
+                state.inflight_response_id = response.get("id")
+            return
+        if kind == "response.output_item.done":
+            item = event.get("item")
+            if isinstance(item, dict):
+                state.inflight_response_items.append(copy.deepcopy(item))
+            return
+        if kind == "response.completed":
+            response = event.get("response")
+            response_id = None
+            response_items: List[Dict[str, Any]] = copy.deepcopy(state.inflight_response_items)
+            if isinstance(response, dict):
+                if isinstance(response.get("id"), str):
+                    response_id = response.get("id")
+                output = response.get("output")
+                if isinstance(output, list) and output:
+                    response_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)]
+            if not response_id:
+                response_id = state.inflight_response_id
+            if state.inflight_track_result and state.inflight_request_payload is not None and response_id:
+                state.last_request_payload = copy.deepcopy(state.inflight_request_payload)
+                state.last_response_id = response_id
+                state.last_response_items = _conversation_output_items(response_items)
+            else:
+                state.last_request_payload = None
+                state.last_response_id = None
+                state.last_response_items = []
+            _clear_inflight(state)
+            return
+        if kind in ("response.failed", "error"):
+            _clear_reuse_state(state)
+def note_responses_final_response(session_id: str, response_obj: Dict[str, Any]) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    if not isinstance(response_obj, dict):
+        return
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+        response_id = response_obj.get("id") if isinstance(response_obj.get("id"), str) else None
+        output = response_obj.get("output")
+        output_items = [copy.deepcopy(item) for item in output if isinstance(item, dict)] if isinstance(output, list) else []
+        if state.inflight_track_result and state.inflight_request_payload is not None and response_id:
+            state.last_request_payload = copy.deepcopy(state.inflight_request_payload)
+            state.last_response_id = response_id
+            state.last_response_items = _conversation_output_items(output_items)
+        else:
+            state.last_request_payload = None
+            state.last_response_id = None
+            state.last_response_items = []
+        _clear_inflight(state)
+def clear_responses_reuse_state(session_id: str) -> None:
+    if not isinstance(session_id, str) or not session_id.strip():
+        return
+    with _LOCK:
+        state = _RESPONSES_SESSION_STATE.get(session_id)
+        if state is None:
+            return
+        _clear_reuse_state(state)
+def reset_session_state() -> None:
+    with _LOCK:
+        _FINGERPRINT_TO_UUID.clear()
+        _ORDER.clear()
+        _RESPONSES_SESSION_STATE.clear()
+        _RESPONSES_ORDER.clear()

build/lib/chatmock/transform.py ADDED Viewed

	@@ -0,0 +1,149 @@

+from __future__ import annotations
+import json
+from typing import Any, Dict, List
+def to_data_url(image_str: str) -> str:
+    if not isinstance(image_str, str) or not image_str:
+        return image_str
+    s = image_str.strip()
+    if s.startswith("data:image/"):
+        return s
+    if s.startswith("http://") or s.startswith("https://"):
+        return s
+    b64 = s.replace("\n", "").replace("\r", "")
+    kind = "image/png"
+    if b64.startswith("/9j/"):
+        kind = "image/jpeg"
+    elif b64.startswith("iVBORw0KGgo"):
+        kind = "image/png"
+    elif b64.startswith("R0lGOD"):
+        kind = "image/gif"
+    return f"data:{kind};base64,{b64}"
+def convert_ollama_messages(
+    messages: List[Dict[str, Any]] | None, top_images: List[str] | None
+) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    msgs = messages if isinstance(messages, list) else []
+    pending_call_ids: List[str] = []
+    call_counter = 0
+    for m in msgs:
+        if not isinstance(m, dict):
+            continue
+        role = m.get("role") or "user"
+        nm: Dict[str, Any] = {"role": role}
+        content = m.get("content")
+        images = m.get("images") if isinstance(m.get("images"), list) else []
+        parts: List[Dict[str, Any]] = []
+        if isinstance(content, list):
+            for p in content:
+                if isinstance(p, dict) and p.get("type") == "text" and isinstance(p.get("text"), str):
+                    parts.append({"type": "text", "text": p.get("text")})
+        elif isinstance(content, str):
+            parts.append({"type": "text", "text": content})
+        for img in images:
+            url = to_data_url(img)
+            if isinstance(url, str) and url:
+                parts.append({"type": "image_url", "image_url": {"url": url}})
+        if parts:
+            nm["content"] = parts
+        if role == "assistant" and isinstance(m.get("tool_calls"), list):
+            tcs = []
+            for tc in m.get("tool_calls"):
+                if not isinstance(tc, dict):
+                    continue
+                fn = tc.get("function") if isinstance(tc.get("function"), dict) else {}
+                name = fn.get("name") if isinstance(fn.get("name"), str) else None
+                args = fn.get("arguments")
+                if name is None:
+                    continue
+                call_id = tc.get("id") or tc.get("call_id")
+                if not isinstance(call_id, str) or not call_id:
+                    call_counter += 1
+                    call_id = f"ollama_call_{call_counter}"
+                pending_call_ids.append(call_id)
+                tcs.append(
+                    {
+                        "id": call_id,
+                        "type": "function",
+                        "function": {
+                            "name": name,
+                            "arguments": args if isinstance(args, str) else (json.dumps(args) if isinstance(args, dict) else "{}"),
+                        },
+                    }
+                )
+            if tcs:
+                nm["tool_calls"] = tcs
+        if role == "tool":
+            tci = m.get("tool_call_id") or m.get("id")
+            if not isinstance(tci, str) or not tci:
+                if pending_call_ids:
+                    tci = pending_call_ids.pop(0)
+            if isinstance(tci, str) and tci:
+                nm["tool_call_id"] = tci
+            if not parts and isinstance(content, str):
+                nm["content"] = content
+        out.append(nm)
+    if isinstance(top_images, list) and top_images:
+        attach_to = None
+        for i in range(len(out) - 1, -1, -1):
+            if out[i].get("role") == "user":
+                attach_to = out[i]
+                break
+        if attach_to is None:
+            attach_to = {"role": "user", "content": []}
+            out.append(attach_to)
+        attach_to.setdefault("content", [])
+        for img in top_images:
+            url = to_data_url(img)
+            if isinstance(url, str) and url:
+                attach_to["content"].append({"type": "image_url", "image_url": {"url": url}})
+    return out
+def normalize_ollama_tools(tools: List[Dict[str, Any]] | None) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    if not isinstance(tools, list):
+        return out
+    for t in tools:
+        if not isinstance(t, dict):
+            continue
+        if isinstance(t.get("function"), dict):
+            fn = t.get("function")
+            name = fn.get("name") if isinstance(fn.get("name"), str) else None
+            if not name:
+                continue
+            out.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": name,
+                        "description": fn.get("description") or "",
+                        "parameters": fn.get("parameters") if isinstance(fn.get("parameters"), dict) else {"type": "object", "properties": {}},
+                    },
+                }
+            )
+            continue
+        name = t.get("name") if isinstance(t.get("name"), str) else None
+        if name:
+            out.append(
+                {
+                    "type": "function",
+                    "function": {
+                        "name": name,
+                        "description": t.get("description") or "",
+                        "parameters": {"type": "object", "properties": {}},
+                    },
+                }
+            )
+    return out

build/lib/chatmock/upstream.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from __future__ import annotations
+import json
+import time
+from typing import Any, Dict, List, Tuple
+from urllib.parse import urlparse, urlunparse
+import requests
+from flask import Response, current_app, jsonify, make_response
+from .config import CHATGPT_RESPONSES_URL
+from .http import build_cors_headers
+from .model_registry import normalize_model_name
+from .session import ensure_session_id
+from flask import request as flask_request
+from .utils import get_effective_chatgpt_auth
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+def start_upstream_request(
+    model: str,
+    input_items: List[Dict[str, Any]],
+    *,
+    instructions: str | None = None,
+    tools: List[Dict[str, Any]] | None = None,
+    tool_choice: Any | None = None,
+    parallel_tool_calls: bool = False,
+    reasoning_param: Dict[str, Any] | None = None,
+    service_tier: str | None = None,
+):
+    access_token, account_id = get_effective_chatgpt_auth()
+    if not access_token or not account_id:
+        resp = make_response(
+            jsonify(
+                {
+                    "error": {
+                        "message": "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                    }
+                }
+            ),
+            401,
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+    include: List[str] = []
+    if isinstance(reasoning_param, dict):
+        include.append("reasoning.encrypted_content")
+    client_session_id = None
+    try:
+        client_session_id = (
+            flask_request.headers.get("X-Session-Id")
+            or flask_request.headers.get("session_id")
+            or None
+        )
+    except Exception:
+        client_session_id = None
+    session_id = ensure_session_id(instructions, input_items, client_session_id)
+    responses_payload = {
+        "model": model,
+        "instructions": instructions if isinstance(instructions, str) and instructions.strip() else instructions,
+        "input": input_items,
+        "tools": tools or [],
+        "tool_choice": tool_choice if tool_choice in ("auto", "none") or isinstance(tool_choice, dict) else "auto",
+        "parallel_tool_calls": bool(parallel_tool_calls),
+        "store": False,
+        "stream": True,
+        "prompt_cache_key": session_id,
+    }
+    if include:
+        responses_payload["include"] = include
+    if reasoning_param is not None:
+        responses_payload["reasoning"] = reasoning_param
+    if isinstance(service_tier, str) and service_tier.strip():
+        responses_payload["service_tier"] = service_tier.strip().lower()
+    return start_upstream_raw_request(
+        responses_payload,
+        session_id=session_id,
+        stream=True,
+    )
+def build_upstream_headers(
+    access_token: str,
+    account_id: str,
+    session_id: str,
+    *,
+    accept: str = "text/event-stream",
+) -> Dict[str, str]:
+    return {
+        "Authorization": f"Bearer {access_token}",
+        "Content-Type": "application/json",
+        "Accept": accept,
+        "chatgpt-account-id": account_id,
+        "OpenAI-Beta": "responses=experimental",
+        "session_id": session_id,
+    }
+def start_upstream_raw_request(
+    responses_payload: Dict[str, Any],
+    *,
+    session_id: str | None = None,
+    stream: bool = True,
+):
+    access_token, account_id = get_effective_chatgpt_auth()
+    if not access_token or not account_id:
+        resp = make_response(
+            jsonify(
+                {
+                    "error": {
+                        "message": "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                    }
+                }
+            ),
+            401,
+        )
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+    effective_session_id = session_id
+    if not isinstance(effective_session_id, str) or not effective_session_id.strip():
+        payload_prompt_cache_key = responses_payload.get("prompt_cache_key")
+        if isinstance(payload_prompt_cache_key, str) and payload_prompt_cache_key.strip():
+            effective_session_id = payload_prompt_cache_key.strip()
+    if not isinstance(effective_session_id, str) or not effective_session_id.strip():
+        effective_session_id = str(int(time.time() * 1000))
+    verbose = False
+    try:
+        verbose = bool(current_app.config.get("VERBOSE"))
+    except Exception:
+        verbose = False
+    if verbose:
+        _log_json("OUTBOUND >> ChatGPT Responses API payload", responses_payload)
+    headers = build_upstream_headers(
+        access_token,
+        account_id,
+        effective_session_id,
+        accept=("text/event-stream" if stream else "application/json"),
+    )
+    try:
+        upstream = requests.post(
+            CHATGPT_RESPONSES_URL,
+            headers=headers,
+            json=responses_payload,
+            stream=stream,
+            timeout=600,
+        )
+    except requests.RequestException as e:
+        resp = make_response(jsonify({"error": {"message": f"Upstream ChatGPT request failed: {e}"}}), 502)
+        for k, v in build_cors_headers().items():
+            resp.headers.setdefault(k, v)
+        return None, resp
+    return upstream, None
+def build_upstream_websocket_url() -> str:
+    parsed = urlparse(CHATGPT_RESPONSES_URL)
+    scheme = parsed.scheme.lower()
+    if scheme == "https":
+        parsed = parsed._replace(scheme="wss")
+    elif scheme == "http":
+        parsed = parsed._replace(scheme="ws")
+    return urlunparse(parsed)

build/lib/chatmock/utils.py ADDED Viewed

	@@ -0,0 +1,874 @@

+from __future__ import annotations
+import base64
+import datetime
+import hashlib
+import json
+import os
+import secrets
+import sys
+from typing import Any, Dict, List, Optional, Tuple
+import requests
+from .config import CLIENT_ID_DEFAULT, OAUTH_TOKEN_URL
+def eprint(*args, **kwargs) -> None:
+    print(*args, file=sys.stderr, **kwargs)
+def get_home_dir() -> str:
+    home = os.getenv("CHATGPT_LOCAL_HOME") or os.getenv("CODEX_HOME")
+    if not home:
+        home = os.path.expanduser("~/.chatgpt-local")
+    return home
+def read_auth_file() -> Dict[str, Any] | None:
+    for base in [
+        os.getenv("CHATGPT_LOCAL_HOME"),
+        os.getenv("CODEX_HOME"),
+        os.path.expanduser("~/.chatgpt-local"),
+        os.path.expanduser("~/.codex"),
+    ]:
+        if not base:
+            continue
+        path = os.path.join(base, "auth.json")
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                return json.load(f)
+        except FileNotFoundError:
+            continue
+        except Exception:
+            continue
+    return None
+def write_auth_file(auth: Dict[str, Any]) -> bool:
+    home = get_home_dir()
+    try:
+        os.makedirs(home, exist_ok=True)
+    except Exception as exc:
+        eprint(f"ERROR: unable to create auth home directory {home}: {exc}")
+        return False
+    path = os.path.join(home, "auth.json")
+    try:
+        with open(path, "w", encoding="utf-8") as fp:
+            if hasattr(os, "fchmod"):
+                os.fchmod(fp.fileno(), 0o600)
+            json.dump(auth, fp, indent=2)
+        return True
+    except Exception as exc:
+        eprint(f"ERROR: unable to write auth file: {exc}")
+        return False
+def parse_jwt_claims(token: str) -> Dict[str, Any] | None:
+    if not token or token.count(".") != 2:
+        return None
+    try:
+        _, payload, _ = token.split(".")
+        padded = payload + "=" * (-len(payload) % 4)
+        data = base64.urlsafe_b64decode(padded.encode())
+        return json.loads(data.decode())
+    except Exception:
+        return None
+def generate_pkce() -> "PkceCodes":
+    from .models import PkceCodes
+    code_verifier = secrets.token_hex(64)
+    digest = hashlib.sha256(code_verifier.encode()).digest()
+    code_challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode()
+    return PkceCodes(code_verifier=code_verifier, code_challenge=code_challenge)
+def convert_chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    def _normalize_image_data_url(url: str) -> str:
+        try:
+            if not isinstance(url, str):
+                return url
+            if not url.startswith("data:image/"):
+                return url
+            if ";base64," not in url:
+                return url
+            header, data = url.split(",", 1)
+            try:
+                from urllib.parse import unquote
+                data = unquote(data)
+            except Exception:
+                pass
+            data = data.strip().replace("\n", "").replace("\r", "")
+            data = data.replace("-", "+").replace("_", "/")
+            pad = (-len(data)) % 4
+            if pad:
+                data = data + ("=" * pad)
+            try:
+                base64.b64decode(data, validate=True)
+            except Exception:
+                return url
+            return f"{header},{data}"
+        except Exception:
+            return url
+    input_items: List[Dict[str, Any]] = []
+    for message in messages:
+        role = message.get("role")
+        if role == "system":
+            continue
+        if role == "tool":
+            call_id = message.get("tool_call_id") or message.get("id")
+            if isinstance(call_id, str) and call_id:
+                content = message.get("content", "")
+                if isinstance(content, list):
+                    texts = []
+                    for part in content:
+                        if isinstance(part, dict):
+                            t = part.get("text") or part.get("content")
+                            if isinstance(t, str) and t:
+                                texts.append(t)
+                    content = "\n".join(texts)
+                if isinstance(content, str):
+                    input_items.append(
+                        {
+                            "type": "function_call_output",
+                            "call_id": call_id,
+                            "output": content,
+                        }
+                    )
+            continue
+        if role == "assistant" and isinstance(message.get("tool_calls"), list):
+            for tc in message.get("tool_calls") or []:
+                if not isinstance(tc, dict):
+                    continue
+                tc_type = tc.get("type", "function")
+                if tc_type != "function":
+                    continue
+                call_id = tc.get("id") or tc.get("call_id")
+                fn = tc.get("function") if isinstance(tc.get("function"), dict) else {}
+                name = fn.get("name") if isinstance(fn, dict) else None
+                args = fn.get("arguments") if isinstance(fn, dict) else None
+                if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                    input_items.append(
+                        {
+                            "type": "function_call",
+                            "name": name,
+                            "arguments": args,
+                            "call_id": call_id,
+                        }
+                    )
+        content = message.get("content", "")
+        content_items: List[Dict[str, Any]] = []
+        if isinstance(content, list):
+            for part in content:
+                if not isinstance(part, dict):
+                    continue
+                ptype = part.get("type")
+                if ptype == "text":
+                    text = part.get("text") or part.get("content") or ""
+                    if isinstance(text, str) and text:
+                        kind = "output_text" if role == "assistant" else "input_text"
+                        content_items.append({"type": kind, "text": text})
+                elif ptype == "image_url":
+                    image = part.get("image_url")
+                    url = image.get("url") if isinstance(image, dict) else image
+                    if isinstance(url, str) and url:
+                        content_items.append({"type": "input_image", "image_url": _normalize_image_data_url(url)})
+        elif isinstance(content, str) and content:
+            kind = "output_text" if role == "assistant" else "input_text"
+            content_items.append({"type": kind, "text": content})
+        if not content_items:
+            continue
+        role_out = "assistant" if role == "assistant" else "user"
+        input_items.append({"type": "message", "role": role_out, "content": content_items})
+    return input_items
+def convert_tools_chat_to_responses(tools: Any) -> List[Dict[str, Any]]:
+    out: List[Dict[str, Any]] = []
+    if not isinstance(tools, list):
+        return out
+    for t in tools:
+        if not isinstance(t, dict):
+            continue
+        if t.get("type") != "function":
+            continue
+        fn = t.get("function") if isinstance(t.get("function"), dict) else {}
+        name = fn.get("name") if isinstance(fn, dict) else None
+        if not isinstance(name, str) or not name:
+            continue
+        desc = fn.get("description") if isinstance(fn, dict) else None
+        params = fn.get("parameters") if isinstance(fn, dict) else None
+        if not isinstance(params, dict):
+            params = {"type": "object", "properties": {}}
+        out.append(
+            {
+                "type": "function",
+                "name": name,
+                "description": desc or "",
+                "strict": False,
+                "parameters": params,
+            }
+        )
+    return out
+def load_chatgpt_tokens(ensure_fresh: bool = True) -> tuple[str | None, str | None, str | None]:
+    auth = read_auth_file()
+    if not isinstance(auth, dict):
+        return None, None, None
+    tokens = auth.get("tokens") if isinstance(auth.get("tokens"), dict) else {}
+    access_token: Optional[str] = tokens.get("access_token")
+    account_id: Optional[str] = tokens.get("account_id")
+    id_token: Optional[str] = tokens.get("id_token")
+    refresh_token: Optional[str] = tokens.get("refresh_token")
+    last_refresh = auth.get("last_refresh")
+    if ensure_fresh and isinstance(refresh_token, str) and refresh_token and CLIENT_ID_DEFAULT:
+        needs_refresh = _should_refresh_access_token(access_token, last_refresh)
+        if needs_refresh or not (isinstance(access_token, str) and access_token):
+            refreshed = _refresh_chatgpt_tokens(refresh_token, CLIENT_ID_DEFAULT)
+            if refreshed:
+                access_token = refreshed.get("access_token") or access_token
+                id_token = refreshed.get("id_token") or id_token
+                refresh_token = refreshed.get("refresh_token") or refresh_token
+                account_id = refreshed.get("account_id") or account_id
+                updated_tokens = dict(tokens)
+                if isinstance(access_token, str) and access_token:
+                    updated_tokens["access_token"] = access_token
+                if isinstance(id_token, str) and id_token:
+                    updated_tokens["id_token"] = id_token
+                if isinstance(refresh_token, str) and refresh_token:
+                    updated_tokens["refresh_token"] = refresh_token
+                if isinstance(account_id, str) and account_id:
+                    updated_tokens["account_id"] = account_id
+                persisted = _persist_refreshed_auth(auth, updated_tokens)
+                if persisted is not None:
+                    auth, tokens = persisted
+                else:
+                    tokens = updated_tokens
+    if not isinstance(account_id, str) or not account_id:
+        account_id = _derive_account_id(id_token)
+    access_token = access_token if isinstance(access_token, str) and access_token else None
+    id_token = id_token if isinstance(id_token, str) and id_token else None
+    account_id = account_id if isinstance(account_id, str) and account_id else None
+    return access_token, account_id, id_token
+def _should_refresh_access_token(access_token: Optional[str], last_refresh: Any) -> bool:
+    if not isinstance(access_token, str) or not access_token:
+        return True
+    claims = parse_jwt_claims(access_token) or {}
+    exp = claims.get("exp") if isinstance(claims, dict) else None
+    now = datetime.datetime.now(datetime.timezone.utc)
+    if isinstance(exp, (int, float)):
+        try:
+            expiry = datetime.datetime.fromtimestamp(float(exp), datetime.timezone.utc)
+        except (OverflowError, OSError, ValueError):
+            expiry = None
+        if expiry is not None:
+            return expiry <= now + datetime.timedelta(minutes=5)
+    if isinstance(last_refresh, str):
+        refreshed_at = _parse_iso8601(last_refresh)
+        if refreshed_at is not None:
+            return refreshed_at <= now - datetime.timedelta(minutes=55)
+    return False
+def _refresh_chatgpt_tokens(refresh_token: str, client_id: str) -> Optional[Dict[str, Optional[str]]]:
+    payload = {
+        "grant_type": "refresh_token",
+        "refresh_token": refresh_token,
+        "client_id": client_id,
+        "scope": "openid profile email offline_access",
+    }
+    try:
+        resp = requests.post(OAUTH_TOKEN_URL, json=payload, timeout=30)
+    except requests.RequestException as exc:
+        eprint(f"ERROR: failed to refresh ChatGPT token: {exc}")
+        return None
+    if resp.status_code >= 400:
+        eprint(f"ERROR: refresh token request returned status {resp.status_code}")
+        return None
+    try:
+        data = resp.json()
+    except ValueError as exc:
+        eprint(f"ERROR: unable to parse refresh token response: {exc}")
+        return None
+    id_token = data.get("id_token")
+    access_token = data.get("access_token")
+    new_refresh_token = data.get("refresh_token") or refresh_token
+    if not isinstance(id_token, str) or not isinstance(access_token, str):
+        eprint("ERROR: refresh token response missing expected tokens")
+        return None
+    account_id = _derive_account_id(id_token)
+    new_refresh_token = new_refresh_token if isinstance(new_refresh_token, str) and new_refresh_token else refresh_token
+    return {
+        "id_token": id_token,
+        "access_token": access_token,
+        "refresh_token": new_refresh_token,
+        "account_id": account_id,
+    }
+def _persist_refreshed_auth(auth: Dict[str, Any], updated_tokens: Dict[str, Any]) -> Optional[Tuple[Dict[str, Any], Dict[str, Any]]]:
+    updated_auth = dict(auth)
+    updated_auth["tokens"] = updated_tokens
+    updated_auth["last_refresh"] = _now_iso8601()
+    if write_auth_file(updated_auth):
+        return updated_auth, updated_tokens
+    eprint("ERROR: unable to persist refreshed auth tokens")
+    return None
+def _derive_account_id(id_token: Optional[str]) -> Optional[str]:
+    if not isinstance(id_token, str) or not id_token:
+        return None
+    claims = parse_jwt_claims(id_token) or {}
+    auth_claims = claims.get("https://api.openai.com/auth") if isinstance(claims, dict) else None
+    if isinstance(auth_claims, dict):
+        account_id = auth_claims.get("chatgpt_account_id")
+        if isinstance(account_id, str) and account_id:
+            return account_id
+    return None
+def _parse_iso8601(value: str) -> Optional[datetime.datetime]:
+    try:
+        if value.endswith("Z"):
+            value = value[:-1] + "+00:00"
+        dt = datetime.datetime.fromisoformat(value)
+        if dt.tzinfo is None:
+            dt = dt.replace(tzinfo=datetime.timezone.utc)
+        return dt.astimezone(datetime.timezone.utc)
+    except Exception:
+        return None
+def _now_iso8601() -> str:
+    return datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")
+def get_effective_chatgpt_auth() -> tuple[str | None, str | None]:
+    access_token, account_id, id_token = load_chatgpt_tokens()
+    if not account_id:
+        account_id = _derive_account_id(id_token)
+    return access_token, account_id
+def sse_translate_chat(
+    upstream,
+    model: str,
+    created: int,
+    verbose: bool = False,
+    vlog=None,
+    reasoning_compat: str = "think-tags",
+    *,
+    include_usage: bool = False,
+):
+    response_id = "chatcmpl-stream"
+    compat = (reasoning_compat or "think-tags").strip().lower()
+    think_open = False
+    think_closed = False
+    saw_output = False
+    sent_stop_chunk = False
+    saw_any_summary = False
+    pending_summary_paragraph = False
+    upstream_usage = None
+    ws_state: dict[str, Any] = {}
+    ws_index: dict[str, int] = {}
+    ws_next_index: int = 0
+    def _serialize_tool_args(eff_args: Any) -> str:
+        """
+        Serialize tool call arguments with proper JSON handling.
+        Args:
+            eff_args: Arguments to serialize (dict, list, str, or other)
+        Returns:
+            JSON string representation of the arguments
+        """
+        if isinstance(eff_args, (dict, list)):
+            return json.dumps(eff_args)
+        elif isinstance(eff_args, str):
+            try:
+                parsed = json.loads(eff_args)
+                if isinstance(parsed, (dict, list)):
+                    return json.dumps(parsed)
+                else:
+                    return json.dumps({"query": eff_args})
+            except (json.JSONDecodeError, ValueError):
+                return json.dumps({"query": eff_args})
+        else:
+            return "{}"
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        try:
+            line_iterator = upstream.iter_lines(decode_unicode=False)
+        except requests.exceptions.ChunkedEncodingError as e:
+            if verbose and vlog:
+                vlog(f"Failed to start stream: {e}")
+            yield b"data: [DONE]\n\n"
+            return
+        for raw in line_iterator:
+            try:
+                if not raw:
+                    continue
+                line = (
+                    raw.decode("utf-8", errors="ignore")
+                    if isinstance(raw, (bytes, bytearray))
+                    else raw
+                )
+                if verbose and vlog:
+                    vlog(line)
+                if not line.startswith("data: "):
+                    continue
+                data = line[len("data: ") :].strip()
+                if not data:
+                    continue
+                if data == "[DONE]":
+                    break
+                try:
+                    evt = json.loads(data)
+                except (json.JSONDecodeError, UnicodeDecodeError):
+                    continue
+            except (
+                requests.exceptions.ChunkedEncodingError,
+                ConnectionError,
+                BrokenPipeError,
+            ) as e:
+                # Connection interrupted mid-stream - end gracefully
+                if verbose and vlog:
+                    vlog(f"Stream interrupted: {e}")
+                yield b"data: [DONE]\n\n"
+                return
+            kind = evt.get("type")
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            if isinstance(kind, str) and ("web_search_call" in kind):
+                try:
+                    call_id = evt.get("item_id") or "ws_call"
+                    if verbose and vlog:
+                        try:
+                            vlog(f"CM_TOOLS {kind} id={call_id} -> tool_calls(web_search)")
+                        except Exception:
+                            pass
+                    item = evt.get('item') if isinstance(evt.get('item'), dict) else {}
+                    params_dict = ws_state.setdefault(call_id, {}) if isinstance(ws_state.get(call_id), dict) else {}
+                    def _merge_from(src):
+                        if not isinstance(src, dict):
+                            return
+                        for whole in ('parameters','args','arguments','input'):
+                            if isinstance(src.get(whole), dict):
+                                params_dict.update(src.get(whole))
+                        if isinstance(src.get('query'), str): params_dict.setdefault('query', src.get('query'))
+                        if isinstance(src.get('q'), str): params_dict.setdefault('query', src.get('q'))
+                        for rk in ('recency','time_range','days'):
+                            if src.get(rk) is not None and rk not in params_dict: params_dict[rk] = src.get(rk)
+                        for dk in ('domains','include_domains','include'):
+                            if isinstance(src.get(dk), list) and 'domains' not in params_dict: params_dict['domains'] = src.get(dk)
+                        for mk in ('max_results','topn','limit'):
+                            if src.get(mk) is not None and 'max_results' not in params_dict: params_dict['max_results'] = src.get(mk)
+                    _merge_from(item)
+                    _merge_from(evt if isinstance(evt, dict) else None)
+                    params = params_dict if params_dict else None
+                    if isinstance(params, dict):
+                        try:
+                            ws_state.setdefault(call_id, {}).update(params)
+                        except Exception:
+                            pass
+                    eff_params = ws_state.get(call_id, params if isinstance(params, (dict, list, str)) else {})
+                    args_str = _serialize_tool_args(eff_params)
+                    if call_id not in ws_index:
+                        ws_index[call_id] = ws_next_index
+                        ws_next_index += 1
+                    _idx = ws_index.get(call_id, 0)
+                    delta_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {
+                                    "tool_calls": [
+                                        {
+                                            "index": _idx,
+                                            "id": call_id,
+                                            "type": "function",
+                                            "function": {"name": "web_search", "arguments": args_str},
+                                        }
+                                    ]
+                                },
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(delta_chunk)}\n\n".encode("utf-8")
+                    if kind.endswith(".completed") or kind.endswith(".done"):
+                        finish_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {"index": 0, "delta": {}, "finish_reason": "tool_calls"}
+                            ],
+                        }
+                        yield f"data: {json.dumps(finish_chunk)}\n\n".encode("utf-8")
+                except Exception:
+                    pass
+            if kind == "response.output_text.delta":
+                delta = evt.get("delta") or ""
+                if compat == "think-tags" and think_open and not think_closed:
+                    close_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
+                    }
+                    yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
+                    think_open = False
+                    think_closed = True
+                saw_output = True
+                chunk = {
+                    "id": response_id,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "delta": {"content": delta}, "finish_reason": None}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.output_item.done":
+                item = evt.get("item") or {}
+                if isinstance(item, dict) and (item.get("type") == "function_call" or item.get("type") == "web_search_call"):
+                    call_id = item.get("call_id") or item.get("id") or ""
+                    name = item.get("name") or ("web_search" if item.get("type") == "web_search_call" else "")
+                    raw_args = item.get("arguments") or item.get("parameters")
+                    if isinstance(raw_args, dict):
+                        try:
+                            ws_state.setdefault(call_id, {}).update(raw_args)
+                        except Exception:
+                            pass
+                    eff_args = ws_state.get(call_id, raw_args if isinstance(raw_args, (dict, list, str)) else {})
+                    try:
+                        args = _serialize_tool_args(eff_args)
+                    except Exception:
+                        args = "{}"
+                    if item.get("type") == "web_search_call" and verbose and vlog:
+                        try:
+                            vlog(f"CM_TOOLS response.output_item.done web_search_call id={call_id} has_args={bool(args)}")
+                        except Exception:
+                            pass
+                    if call_id not in ws_index:
+                        ws_index[call_id] = ws_next_index
+                        ws_next_index += 1
+                    _idx = ws_index.get(call_id, 0)
+                    if isinstance(call_id, str) and isinstance(name, str) and isinstance(args, str):
+                        delta_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {
+                                        "tool_calls": [
+                                            {
+                                                "index": _idx,
+                                                "id": call_id,
+                                                "type": "function",
+                                                "function": {"name": name, "arguments": args},
+                                            }
+                                        ]
+                                    },
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(delta_chunk)}\n\n".encode("utf-8")
+                        finish_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {}, "finish_reason": "tool_calls"}],
+                        }
+                        yield f"data: {json.dumps(finish_chunk)}\n\n".encode("utf-8")
+            elif kind == "response.reasoning_summary_part.added":
+                if compat in ("think-tags", "o3"):
+                    if saw_any_summary:
+                        pending_summary_paragraph = True
+                    else:
+                        saw_any_summary = True
+            elif kind in ("response.reasoning_summary_text.delta", "response.reasoning_text.delta"):
+                delta_txt = evt.get("delta") or ""
+                if compat == "o3":
+                    if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                        nl_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"reasoning": {"content": [{"type": "text", "text": "\n"}]}},
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(nl_chunk)}\n\n".encode("utf-8")
+                        pending_summary_paragraph = False
+                    chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"reasoning": {"content": [{"type": "text", "text": delta_txt}]}},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                elif compat == "think-tags":
+                    if not think_open and not think_closed:
+                        open_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {"content": "<think>"}, "finish_reason": None}],
+                        }
+                        yield f"data: {json.dumps(open_chunk)}\n\n".encode("utf-8")
+                        think_open = True
+                    if think_open and not think_closed:
+                        if kind == "response.reasoning_summary_text.delta" and pending_summary_paragraph:
+                            nl_chunk = {
+                                "id": response_id,
+                                "object": "chat.completion.chunk",
+                                "created": created,
+                                "model": model,
+                                "choices": [{"index": 0, "delta": {"content": "\n"}, "finish_reason": None}],
+                            }
+                            yield f"data: {json.dumps(nl_chunk)}\n\n".encode("utf-8")
+                            pending_summary_paragraph = False
+                        content_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {"content": delta_txt}, "finish_reason": None}],
+                        }
+                        yield f"data: {json.dumps(content_chunk)}\n\n".encode("utf-8")
+                else:
+                    if kind == "response.reasoning_summary_text.delta":
+                        chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {
+                                    "index": 0,
+                                    "delta": {"reasoning_summary": delta_txt, "reasoning": delta_txt},
+                                    "finish_reason": None,
+                                }
+                            ],
+                        }
+                        yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                    else:
+                        chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [
+                                {"index": 0, "delta": {"reasoning": delta_txt}, "finish_reason": None}
+                            ],
+                        }
+                        yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif isinstance(kind, str) and kind.endswith(".done"):
+                pass
+            elif kind == "response.output_text.done":
+                chunk = {
+                    "id": response_id,
+                    "object": "chat.completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                sent_stop_chunk = True
+            elif kind == "response.failed":
+                err = evt.get("response", {}).get("error", {}).get("message", "response.failed")
+                chunk = {"error": {"message": err}}
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.completed":
+                m = _extract_usage(evt)
+                if m:
+                    upstream_usage = m
+                if compat == "think-tags" and think_open and not think_closed:
+                    close_chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {"content": "</think>"}, "finish_reason": None}],
+                    }
+                    yield f"data: {json.dumps(close_chunk)}\n\n".encode("utf-8")
+                    think_open = False
+                    think_closed = True
+                if not sent_stop_chunk:
+                    chunk = {
+                        "id": response_id,
+                        "object": "chat.completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                    sent_stop_chunk = True
+                if include_usage and upstream_usage:
+                    try:
+                        usage_chunk = {
+                            "id": response_id,
+                            "object": "chat.completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "delta": {}, "finish_reason": None}],
+                            "usage": upstream_usage,
+                        }
+                        yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
+                    except Exception:
+                        pass
+                yield b"data: [DONE]\n\n"
+                break
+    finally:
+        upstream.close()
+def sse_translate_text(upstream, model: str, created: int, verbose: bool = False, vlog=None, *, include_usage: bool = False):
+    response_id = "cmpl-stream"
+    upstream_usage = None
+    def _extract_usage(evt: Dict[str, Any]) -> Dict[str, int] | None:
+        try:
+            usage = (evt.get("response") or {}).get("usage")
+            if not isinstance(usage, dict):
+                return None
+            pt = int(usage.get("input_tokens") or 0)
+            ct = int(usage.get("output_tokens") or 0)
+            tt = int(usage.get("total_tokens") or (pt + ct))
+            return {"prompt_tokens": pt, "completion_tokens": ct, "total_tokens": tt}
+        except Exception:
+            return None
+    try:
+        for raw_line in upstream.iter_lines(decode_unicode=False):
+            if not raw_line:
+                continue
+            line = raw_line.decode("utf-8", errors="ignore") if isinstance(raw_line, (bytes, bytearray)) else raw_line
+            if verbose and vlog:
+                vlog(line)
+            if not line.startswith("data: "):
+                continue
+            data = line[len("data: "):].strip()
+            if not data or data == "[DONE]":
+                if data == "[DONE]":
+                    chunk = {
+                        "id": response_id,
+                        "object": "text_completion.chunk",
+                        "created": created,
+                        "model": model,
+                        "choices": [{"index": 0, "text": "", "finish_reason": "stop"}],
+                    }
+                    yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+                continue
+            try:
+                evt = json.loads(data)
+            except Exception:
+                continue
+            kind = evt.get("type")
+            if isinstance(evt.get("response"), dict) and isinstance(evt["response"].get("id"), str):
+                response_id = evt["response"].get("id") or response_id
+            if kind == "response.output_text.delta":
+                delta_text = evt.get("delta") or ""
+                chunk = {
+                    "id": response_id,
+                    "object": "text_completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "text": delta_text, "finish_reason": None}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.output_text.done":
+                chunk = {
+                    "id": response_id,
+                    "object": "text_completion.chunk",
+                    "created": created,
+                    "model": model,
+                    "choices": [{"index": 0, "text": "", "finish_reason": "stop"}],
+                }
+                yield f"data: {json.dumps(chunk)}\n\n".encode("utf-8")
+            elif kind == "response.completed":
+                m = _extract_usage(evt)
+                if m:
+                    upstream_usage = m
+                if include_usage and upstream_usage:
+                    try:
+                        usage_chunk = {
+                            "id": response_id,
+                            "object": "text_completion.chunk",
+                            "created": created,
+                            "model": model,
+                            "choices": [{"index": 0, "text": "", "finish_reason": None}],
+                            "usage": upstream_usage,
+                        }
+                        yield f"data: {json.dumps(usage_chunk)}\n\n".encode("utf-8")
+                    except Exception:
+                        pass
+                yield b"data: [DONE]\n\n"
+                break
+    finally:
+        upstream.close()

build/lib/chatmock/version.py ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ from __future__ import annotations
2	+
3	+
4	+ __version__ = "1.37"

build/lib/chatmock/websocket_routes.py ADDED Viewed

	@@ -0,0 +1,225 @@

+from __future__ import annotations
+import json
+import os
+import ssl
+from typing import Any, Dict
+import certifi
+from flask import current_app, request
+from flask_sock import Sock
+from websockets.sync.client import connect as websocket_connect
+from websockets.exceptions import ConnectionClosed
+from .responses_api import (
+    ResponsesRequestError,
+    extract_client_session_id,
+    normalize_responses_payload,
+)
+from .session import (
+    clear_responses_reuse_state,
+    note_responses_stream_event,
+    prepare_responses_request_for_session,
+)
+from .upstream import build_upstream_headers, build_upstream_websocket_url
+from .utils import get_effective_chatgpt_auth
+def _log_json(prefix: str, payload: Any) -> None:
+    try:
+        print(f"{prefix}\n{json.dumps(payload, indent=2, ensure_ascii=False)}")
+    except Exception:
+        try:
+            print(f"{prefix}\n{payload}")
+        except Exception:
+            pass
+def _error_event(message: str, *, status_code: int = 400, code: str | None = None) -> Dict[str, Any]:
+    error: Dict[str, Any] = {"message": message}
+    if code:
+        error["code"] = code
+    return {"type": "error", "status_code": status_code, "error": error}
+def _is_terminal_event(event: Any) -> bool:
+    if not isinstance(event, dict):
+        return False
+    kind = event.get("type")
+    return kind in ("response.completed", "response.failed", "error")
+def _build_websocket_ssl_context() -> ssl.SSLContext:
+    cafile = (
+        os.getenv("CODEX_CA_CERTIFICATE")
+        or os.getenv("SSL_CERT_FILE")
+        or certifi.where()
+    )
+    return ssl.create_default_context(cafile=cafile)
+def connect_upstream_websocket(url: str, headers: Dict[str, str]):
+    return websocket_connect(
+        url,
+        additional_headers=headers,
+        open_timeout=15,
+        ssl=_build_websocket_ssl_context(),
+    )
+def register_websocket_routes(sock: Sock) -> None:
+    @sock.route("/v1/responses")
+    def responses_websocket(ws) -> None:
+        verbose = bool(current_app.config.get("VERBOSE"))
+        upstream_ws = None
+        upstream_session_id: str | None = None
+        active_session_id: str | None = None
+        def _send_error(message: str, *, status_code: int = 400, code: str | None = None) -> None:
+            evt = _error_event(message, status_code=status_code, code=code)
+            if verbose:
+                _log_json("STREAM OUT WS /v1/responses (error)", evt)
+            try:
+                ws.send(json.dumps(evt))
+            except Exception:
+                pass
+        try:
+            while True:
+                incoming = ws.receive()
+                if incoming is None:
+                    break
+                if isinstance(incoming, bytes):
+                    incoming_text = incoming.decode("utf-8", errors="ignore")
+                else:
+                    incoming_text = str(incoming)
+                if verbose:
+                    print("IN WS /v1/responses\n" + incoming_text)
+                try:
+                    payload = json.loads(incoming_text)
+                except Exception:
+                    _send_error("Websocket frames must be valid JSON objects.", status_code=400)
+                    break
+                if not isinstance(payload, dict):
+                    _send_error("Websocket frames must be JSON objects.", status_code=400)
+                    break
+                client_session_id = extract_client_session_id(request.headers)
+                outbound_text = incoming_text
+                session_id = upstream_session_id
+                if payload.get("type") == "response.create":
+                    try:
+                        normalized = normalize_responses_payload(
+                            payload,
+                            config=current_app.config,
+                            client_session_id=client_session_id,
+                        )
+                    except ResponsesRequestError as exc:
+                        _send_error(str(exc), status_code=exc.status_code, code=exc.code)
+                        continue
+                    if normalized.service_tier_resolution.warning_message and verbose:
+                        print(f"[FastMode] {normalized.service_tier_resolution.warning_message}")
+                    prepared = prepare_responses_request_for_session(
+                        normalized.session_id,
+                        normalized.payload,
+                        allow_previous_response_id=True,
+                    )
+                    outbound_text = json.dumps(prepared.payload)
+                    session_id = normalized.session_id
+                    active_session_id = normalized.session_id
+                    if verbose:
+                        _log_json("OUTBOUND >> ChatGPT Responses WS payload", prepared.payload)
+                elif upstream_ws is None:
+                    _send_error(
+                        "The first websocket message must be a response.create request.",
+                        status_code=400,
+                    )
+                    break
+                if upstream_ws is None or (session_id and session_id != upstream_session_id):
+                    access_token, account_id = get_effective_chatgpt_auth()
+                    if not access_token or not account_id:
+                        if session_id:
+                            clear_responses_reuse_state(session_id)
+                        _send_error(
+                            "Missing ChatGPT credentials. Run 'python3 chatmock.py login' first.",
+                            status_code=401,
+                        )
+                        break
+                    if upstream_ws is not None:
+                        try:
+                            upstream_ws.close()
+                        except Exception:
+                            pass
+                    effective_session_id = session_id or client_session_id or ""
+                    try:
+                        upstream_ws = connect_upstream_websocket(
+                            build_upstream_websocket_url(),
+                            build_upstream_headers(
+                                access_token,
+                                account_id,
+                                effective_session_id,
+                                accept="application/json",
+                            ),
+                        )
+                    except Exception as exc:
+                        if session_id:
+                            clear_responses_reuse_state(session_id)
+                        _send_error(
+                            f"Upstream websocket connection failed: {exc}",
+                            status_code=502,
+                        )
+                        break
+                    upstream_session_id = effective_session_id
+                upstream_ws.send(outbound_text)
+                while True:
+                    try:
+                        upstream_message = upstream_ws.recv()
+                    except ConnectionClosed:
+                        if active_session_id:
+                            clear_responses_reuse_state(active_session_id)
+                        _send_error("Upstream websocket closed unexpectedly.", status_code=502)
+                        return
+                    if upstream_message is None:
+                        if active_session_id:
+                            clear_responses_reuse_state(active_session_id)
+                        _send_error("Upstream websocket closed unexpectedly.", status_code=502)
+                        return
+                    if verbose:
+                        try:
+                            print("STREAM OUT WS /v1/responses\n" + str(upstream_message))
+                        except Exception:
+                            pass
+                    ws.send(upstream_message)
+                    try:
+                        parsed = json.loads(upstream_message)
+                    except Exception:
+                        parsed = None
+                    if isinstance(parsed, dict) and active_session_id:
+                        note_responses_stream_event(active_session_id, parsed)
+                    if _is_terminal_event(parsed):
+                        if isinstance(parsed, dict) and parsed.get("type") in ("response.failed", "error"):
+                            if upstream_ws is not None:
+                                try:
+                                    upstream_ws.close()
+                                except Exception:
+                                    pass
+                            upstream_ws = None
+                            upstream_session_id = None
+                        break
+        finally:
+            if upstream_ws is not None:
+                try:
+                    upstream_ws.close()
+                except Exception:
+                    pass

chatmock.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,200 @@

+Metadata-Version: 2.4
+Name: chatmock
+Version: 1.37
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: blinker==1.9.0
+Requires-Dist: certifi==2025.8.3
+Requires-Dist: flask==3.1.1
+Requires-Dist: flask-sock==0.7.0
+Requires-Dist: idna==3.10
+Requires-Dist: itsdangerous==2.2.0
+Requires-Dist: jinja2==3.1.6
+Requires-Dist: markupsafe==3.0.2
+Requires-Dist: requests==2.32.5
+Requires-Dist: urllib3==2.5.0
+Requires-Dist: websockets==15.0.1
+Requires-Dist: werkzeug==3.1.3
+Provides-Extra: gui
+Requires-Dist: Pillow==11.3.0; extra == "gui"
+Requires-Dist: PyInstaller==6.16.0; extra == "gui"
+Requires-Dist: PySide6==6.9.2; extra == "gui"
+Dynamic: license-file
+<div align="center">
+# ChatMock
+**Allows Codex to work in your favourite chat apps and coding tools.**
+[![PyPI](https://img.shields.io/pypi/v/chatmock?color=blue&label=pypi)](https://pypi.org/project/chatmock/)
+[![Python](https://img.shields.io/pypi/pyversions/chatmock)](https://pypi.org/project/chatmock/)
+[![License](https://img.shields.io/github/license/RayBytes/ChatMock)](LICENSE)
+[![Stars](https://img.shields.io/github/stars/RayBytes/ChatMock?style=flat)](https://github.com/RayBytes/ChatMock/stargazers)
+[![Last Commit](https://img.shields.io/github/last-commit/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/commits/main)
+[![Issues](https://img.shields.io/github/issues/RayBytes/ChatMock)](https://github.com/RayBytes/ChatMock/issues)
+<br>
+</div>
+<br>
+## Install
+#### Homebrew
+```bash
+brew tap RayBytes/chatmock
+brew install chatmock
+```
+#### pipx / pip
+```bash
+pipx install chatmock
+```
+#### GUI
+Download from [releases](https://github.com/RayBytes/ChatMock/releases) (macOS & Windows)
+#### Docker
+See [DOCKER.md](DOCKER.md)
+<br>
+## Getting Started
+```bash
+# 1. Sign in with your ChatGPT account
+chatmock login
+# 2. Start the server
+chatmock serve
+```
+The server runs at `http://127.0.0.1:8000` by default. Use `http://127.0.0.1:8000/v1` as your base URL for OpenAI-compatible apps.
+<br>
+## Usage
+<details open>
+<summary><b>Python</b></summary>
+```python
+from openai import OpenAI
+client = OpenAI(
+    base_url="http://127.0.0.1:8000/v1",
+    api_key="anything"  # not checked
+)
+response = client.chat.completions.create(
+    model="gpt-5.4",
+    messages=[{"role": "user", "content": "hello"}]
+)
+print(response.choices[0].message.content)
+```
+</details>
+<details>
+<summary><b>cURL</b></summary>
+```bash
+curl http://127.0.0.1:8000/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "gpt-5.4",
+    "messages": [{"role": "user", "content": "hello"}]
+  }'
+```
+</details>
+<br>
+## Supported Models
+- `gpt-5.4`
+- `gpt-5.4-mini`
+- `gpt-5.2`
+- `gpt-5.1`
+- `gpt-5`
+- `gpt-5.3-codex`
+- `gpt-5.3-codex-spark`
+- `gpt-5.2-codex`
+- `gpt-5-codex`
+- `gpt-5.1-codex`
+- `gpt-5.1-codex-max`
+- `gpt-5.1-codex-mini`
+- `codex-mini`
+<br>
+## Features
+- Tool / function calling
+- Vision / image input
+- Thinking summaries (via think tags)
+- Configurable thinking effort
+- Fast mode for supported models
+- Web search tool
+- OpenAI-compatible `/v1/responses` (HTTP + WebSocket)
+- Ollama-compatible endpoints
+- Reasoning effort exposed as separate models (optional)
+<br>
+## Configuration
+All flags go after `chatmock serve`. These can also be set as environment variables.
+| Flag | Env var | Options | Default | Description |
+|------|---------|---------|---------|-------------|
+| `--reasoning-effort` | `CHATGPT_LOCAL_REASONING_EFFORT` | none, minimal, low, medium, high, xhigh | medium | How hard the model thinks |
+| `--reasoning-summary` | `CHATGPT_LOCAL_REASONING_SUMMARY` | auto, concise, detailed, none | auto | Thinking summary verbosity |
+| `--reasoning-compat` | `CHATGPT_LOCAL_REASONING_COMPAT` | legacy, o3, think-tags | think-tags | How reasoning is returned to the client |
+| `--fast-mode` | `CHATGPT_LOCAL_FAST_MODE` | true/false | false | Priority processing for supported models |
+| `--enable-web-search` | `CHATGPT_LOCAL_ENABLE_WEB_SEARCH` | true/false | false | Allow the model to search the web |
+| `--expose-reasoning-models` | `CHATGPT_LOCAL_EXPOSE_REASONING_MODELS` | true/false | false | List each reasoning level as its own model |
+<details>
+<summary><b>Web search in a request</b></summary>
+```json
+{
+  "model": "gpt-5.4",
+  "messages": [{"role": "user", "content": "latest news on ..."}],
+  "responses_tools": [{"type": "web_search"}],
+  "responses_tool_choice": "auto"
+}
+```
+</details>
+<details>
+<summary><b>Fast mode in a request</b></summary>
+```json
+{
+  "model": "gpt-5.4",
+  "input": "summarize this",
+  "fast_mode": true
+}
+```
+</details>
+<br>
+## Notes
+Use responsibly and at your own risk. This project is not affiliated with OpenAI.
+<br>
+## Star History
+[![Star History Chart](https://api.star-history.com/svg?repos=RayBytes/ChatMock&type=Timeline)](https://www.star-history.com/#RayBytes/ChatMock&Timeline)

chatmock.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,34 @@

+LICENSE
+README.md
+pyproject.toml
+chatmock/__init__.py
+chatmock/app.py
+chatmock/cli.py
+chatmock/config.py
+chatmock/fast_mode.py
+chatmock/http.py
+chatmock/limits.py
+chatmock/model_registry.py
+chatmock/models.py
+chatmock/oauth.py
+chatmock/prompt.md
+chatmock/prompt_gpt5_codex.md
+chatmock/reasoning.py
+chatmock/responses_api.py
+chatmock/routes_ollama.py
+chatmock/routes_openai.py
+chatmock/session.py
+chatmock/transform.py
+chatmock/upstream.py
+chatmock/utils.py
+chatmock/version.py
+chatmock/websocket_routes.py
+chatmock.egg-info/PKG-INFO
+chatmock.egg-info/SOURCES.txt
+chatmock.egg-info/dependency_links.txt
+chatmock.egg-info/entry_points.txt
+chatmock.egg-info/requires.txt
+chatmock.egg-info/top_level.txt
+tests/test_fast_mode.py
+tests/test_models.py
+tests/test_routes.py

chatmock.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

chatmock.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [console_scripts]
2	+ chatmock = chatmock.cli:main

chatmock.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,17 @@

+blinker==1.9.0
+certifi==2025.8.3
+flask==3.1.1
+flask-sock==0.7.0
+idna==3.10
+itsdangerous==2.2.0
+jinja2==3.1.6
+markupsafe==3.0.2
+requests==2.32.5
+urllib3==2.5.0
+websockets==15.0.1
+werkzeug==3.1.3
+[gui]
+Pillow==11.3.0
+PyInstaller==6.16.0
+PySide6==6.9.2

chatmock.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ chatmock

chatmock.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from __future__ import annotations
+from chatmock.cli import main
+if __name__ == "__main__":
+    main()

chatmock/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from __future__ import annotations
+from .app import create_app
+from .cli import main
+from .version import __version__

chatmock/__pycache__/__init__.cpython-314.pyc ADDED Viewed

Binary file (341 Bytes). View file

chatmock/__pycache__/app.cpython-314.pyc ADDED Viewed

Binary file (2.83 kB). View file

chatmock/__pycache__/cli.cpython-314.pyc ADDED Viewed

Binary file (21.6 kB). View file

chatmock/__pycache__/config.cpython-314.pyc ADDED Viewed

Binary file (3.07 kB). View file

chatmock/__pycache__/fast_mode.cpython-314.pyc ADDED Viewed

Binary file (3.56 kB). View file

chatmock/__pycache__/http.cpython-314.pyc ADDED Viewed

Binary file (1.82 kB). View file

chatmock/__pycache__/limits.cpython-314.pyc ADDED Viewed

Binary file (10.9 kB). View file

chatmock/__pycache__/model_registry.cpython-314.pyc ADDED Viewed

Binary file (7.71 kB). View file

chatmock/__pycache__/models.cpython-314.pyc ADDED Viewed

Binary file (1.23 kB). View file