somratpro Claude Sonnet 4.6 commited on
Commit
be8c7bb
Β·
0 Parent(s):

feat: initial HuggingClip release

Browse files

Paperclip AI agent orchestration platform on Hugging Face Spaces.

- Dockerfile: node:lts-trixie-slim, builds Paperclip from source
- start.sh: orchestrate postgres β†’ restore β†’ proxy β†’ sync β†’ launch
- health-server.js: port 7861 gateway, dashboard, proxy to 3100
- paperclip-sync.py: pg_dump + tarball backup/restore via HF Dataset
- cloudflare-proxy: bypass HF Spaces outbound network blocks
- docker-compose.yml: local dev environment
- .env.example: full configuration reference
- README.md: deployment guide with quick start

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

.env.example ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ============================================================================
2
+ # HuggingClip - Paperclip on Hugging Face Spaces
3
+ # Configuration Reference
4
+ # ============================================================================
5
+
6
+ # ============================================================================
7
+ # Hugging Face Integration (REQUIRED for backup persistence)
8
+ # ============================================================================
9
+
10
+ # Your Hugging Face API token (needed for backup/restore)
11
+ # Get it from: https://huggingface.co/settings/tokens
12
+ HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxx
13
+
14
+ # Your Hugging Face username (optional, auto-detected from token if not set)
15
+ # HF_USERNAME=your-username
16
+
17
+ # ============================================================================
18
+ # Paperclip Core Configuration
19
+ # ============================================================================
20
+
21
+ # PostgreSQL connection string
22
+ # Format: postgres://user:password@host:port/database
23
+ DATABASE_URL=postgres://postgres:paperclip@localhost:5432/paperclip
24
+
25
+ # Paperclip API port
26
+ PORT=3100
27
+
28
+ # Serve the Paperclip UI (true/false)
29
+ SERVE_UI=true
30
+
31
+ # Node.js environment (development/production)
32
+ NODE_ENV=production
33
+
34
+ # Server bind address
35
+ HOST=0.0.0.0
36
+
37
+ # Paperclip home directory for config/data storage
38
+ PAPERCLIP_HOME=/paperclip
39
+
40
+ # Deployment mode (local/authenticated)
41
+ # Use 'authenticated' for production with Better Auth
42
+ PAPERCLIP_DEPLOYMENT_MODE=authenticated
43
+
44
+ # ============================================================================
45
+ # Paperclip Agent Providers (varies by setup)
46
+ # ============================================================================
47
+
48
+ # If using Claude as an agent provider
49
+ CLAUDE_API_KEY=sk-ant-xxxxxxxxxxxxxxxxxxxxxxxxxxxx
50
+
51
+ # For other LLM providers (OpenAI, etc.)
52
+ # LLM_API_KEY=your-api-key
53
+
54
+ # Allow Claude Code to use all models (default: restricted)
55
+ OPENCODE_ALLOW_ALL_MODELS=true
56
+
57
+ # ============================================================================
58
+ # Cloudflare Proxy (Optional - for bypassing HF Spaces network blocks)
59
+ # ============================================================================
60
+
61
+ # Cloudflare API token for setting up outbound proxy
62
+ # Get it from: https://dash.cloudflare.com/profile/api-tokens
63
+ # CLOUDFLARE_WORKERS_TOKEN=xxx
64
+
65
+ # Your Cloudflare account ID
66
+ # CLOUDFLARE_ACCOUNT_ID=xxx
67
+
68
+ # ============================================================================
69
+ # Database Backup Configuration
70
+ # ============================================================================
71
+
72
+ # Sync interval in seconds (how often to backup to HF Dataset)
73
+ SYNC_INTERVAL=180
74
+
75
+ # Maximum file size for individual backups (in bytes)
76
+ # Default: 52428800 (50MB)
77
+ SYNC_MAX_FILE_BYTES=52428800
78
+
79
+ # Name of the HF Dataset to store backups
80
+ # Will be created as: {HF_USERNAME}/{BACKUP_DATASET_NAME}
81
+ BACKUP_DATASET_NAME=paperclip-backup
82
+
83
+ # ============================================================================
84
+ # Authentication & Security
85
+ # ============================================================================
86
+
87
+ # Better Auth secret for user authentication
88
+ # Generate a random secret: openssl rand -base64 32
89
+ BETTER_AUTH_SECRET=your-random-secret-here-minimum-32-characters
90
+
91
+ # Optional: Discord webhook for admin notifications
92
+ # DISCORD_WEBHOOK_URL=https://discord.com/api/webhooks/...
93
+
94
+ # ============================================================================
95
+ # Monitoring & Uptime
96
+ # ============================================================================
97
+
98
+ # Optional: Webhook URL for restart/failure alerts
99
+ # Useful for UptimeRobot or similar monitoring services
100
+ # WEBHOOK_URL=https://uptime-robot-webhook-url
101
+
102
+ # ============================================================================
103
+ # Telemetry & Privacy
104
+ # ============================================================================
105
+
106
+ # Disable Paperclip telemetry (1 = disabled)
107
+ PAPERCLIP_TELEMETRY_DISABLED=1
108
+
109
+ # Respect Do Not Track header
110
+ DO_NOT_TRACK=1
111
+
112
+ # ============================================================================
113
+ # Development-Only Variables (for local testing)
114
+ # ============================================================================
115
+
116
+ # Debug logging (set to anything to enable)
117
+ # DEBUG=1
118
+
119
+ # ============================================================================
120
+ # HuggingFace Spaces Specific
121
+ # ============================================================================
122
+
123
+ # These are automatically set by HF Spaces:
124
+ # SPACE_ID: your-space-id
125
+ # SPACE_PERSISTENT_DIRECTORY: /tmp (ephemeral, 50GB)
126
+ # SPACE_AUTHOR_NAME: your-hf-username
127
+ # SPACE_RUNTIME: docker
CHANGELOG.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [1.0.0] - 2026-04-27
9
+
10
+ ### Added
11
+
12
+ - Initial release of HuggingClip
13
+ - Paperclip AI agent orchestration platform deployment on Hugging Face Spaces
14
+ - Automatic database persistence via Hugging Face Dataset backup/restore
15
+ - Health monitoring dashboard with real-time status
16
+ - PostgreSQL backup and restore functionality
17
+ - Health check endpoint for uptime monitoring
18
+ - Reverse proxy for Paperclip API and UI
19
+ - Environment variable configuration system
20
+ - Cloudflare proxy integration for network-restricted API providers
21
+ - Docker and Docker Compose support for local development
22
+ - Graceful shutdown with data synchronization
23
+ - UptimeRobot integration for preventing Space sleep
24
+ - Comprehensive documentation and guides
25
+ - MIT License
26
+
27
+ ### Features
28
+
29
+ - **One-click Deploy**: Easy deployment to Hugging Face Spaces
30
+ - **Data Persistence**: Automatic backup and restore of database
31
+ - **Health Monitoring**: Dashboard showing service status and metrics
32
+ - **Flexible Configuration**: Environment variable-based configuration
33
+ - **Network Bypass**: Optional Cloudflare proxy for blocked domains
34
+ - **Local Development**: Docker Compose setup for testing
35
+ - **Extensible**: Based on Paperclip's plugin architecture
36
+
37
+ ### Documentation
38
+
39
+ - README.md with deployment and usage guides
40
+ - .env.example with comprehensive configuration reference
41
+ - SECURITY.md with security best practices
42
+ - CONTRIBUTING.md with contribution guidelines
43
+ - CODE_OF_CONDUCT.md with community guidelines
44
+
45
+ ## [Unreleased]
46
+
47
+ ### Planned
48
+
49
+ - Multi-Space federation support
50
+ - Backup versioning and rotation policies
51
+ - Advanced monitoring integrations (Prometheus, Grafana)
52
+ - One-click restore from specific backup snapshots
53
+ - CLI tool for local backup management
54
+ - Multi-database support (external PostgreSQL)
55
+ - Kubernetes deployment manifests
56
+ - Better Auth integration improvements
57
+ - Agent plugin marketplace
CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We pledge to make participation in our project a harassment-free experience for everyone.
6
+
7
+ ## Our Standards
8
+
9
+ **Positive behavior:**
10
+ - Using welcoming and inclusive language
11
+ - Being respectful of differing viewpoints
12
+ - Gracefully accepting constructive criticism
13
+ - Focusing on what is best for the community
14
+
15
+ **Unacceptable behavior:**
16
+ - Trolling, insulting, or derogatory comments
17
+ - Harassment in any form
18
+ - Publishing others' private information without permission
19
+
20
+ ## Enforcement
21
+
22
+ Project maintainers may remove, edit, or reject contributions that violate this Code of Conduct.
23
+
24
+ ## Scope
25
+
26
+ Applies in all project spaces and public spaces when representing the project.
27
+
28
+ ## Attribution
29
+
30
+ Adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1.
CONTRIBUTING.md ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to HuggingClip
2
+
3
+ ## How to Contribute
4
+
5
+ ### Bug Reports
6
+
7
+ 1. Check existing [issues](https://github.com/somratpro/HuggingClip/issues) first
8
+ 2. Create a new issue with:
9
+ - Clear title and description
10
+ - Steps to reproduce
11
+ - Expected vs actual behavior
12
+ - Environment details (OS, Docker version, HF Space tier)
13
+
14
+ ### Pull Requests
15
+
16
+ 1. Fork the repository
17
+ 2. Create a feature branch: `git checkout -b feature/your-feature`
18
+ 3. Make changes and test locally with `docker-compose up`
19
+ 4. Commit: `git commit -m "feat: description of change"`
20
+ 5. Push and open a Pull Request
21
+
22
+ ### Local Development Setup
23
+
24
+ ```bash
25
+ # Clone
26
+ git clone https://github.com/somratpro/HuggingClip.git
27
+ cd HuggingClip
28
+
29
+ # Configure
30
+ cp .env.example .env
31
+ # Edit .env with your HF_TOKEN and any provider keys
32
+
33
+ # Build and start
34
+ docker-compose up --build
35
+
36
+ # Test health
37
+ curl http://localhost:7861/health
38
+
39
+ # Access dashboard
40
+ open http://localhost:7861/
41
+ ```
42
+
43
+ ### File Overview
44
+
45
+ | File | Purpose |
46
+ |------|---------|
47
+ | `Dockerfile` | Container build definition |
48
+ | `start.sh` | Orchestration script (startup sequence) |
49
+ | `health-server.js` | Port 7861 gateway + dashboard |
50
+ | `paperclip-sync.py` | PostgreSQL backup/restore to HF Dataset |
51
+ | `cloudflare-proxy.js` | Outbound proxy for blocked domains |
52
+ | `cloudflare-proxy-setup.py` | Auto-provision Cloudflare Worker |
53
+ | `docker-compose.yml` | Local development setup |
54
+
55
+ ### Commit Convention
56
+
57
+ Follow [Conventional Commits](https://www.conventionalcommits.org/):
58
+
59
+ - `feat:` New features
60
+ - `fix:` Bug fixes
61
+ - `docs:` Documentation changes
62
+ - `refactor:` Code restructuring
63
+ - `chore:` Maintenance tasks
64
+
65
+ ## Questions?
66
+
67
+ Open an issue or check the [Paperclip docs](https://docs.paperclip.ing).
Dockerfile ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Build Paperclip from source
2
+ FROM node:lts-trixie-slim AS paperclip-builder
3
+
4
+ WORKDIR /build
5
+
6
+ RUN apt-get update && apt-get install -y \
7
+ git \
8
+ python3 \
9
+ python3-pip \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Clone Paperclip repository
13
+ RUN git clone https://github.com/paperclipai/paperclip.git . && \
14
+ git checkout main
15
+
16
+ # Install pnpm
17
+ RUN npm install -g pnpm@9.15.2
18
+
19
+ # Install dependencies
20
+ RUN pnpm install --frozen-lockfile
21
+
22
+ # Build Paperclip
23
+ RUN pnpm build
24
+
25
+ # Stage 2: Runtime
26
+ FROM node:lts-trixie-slim
27
+
28
+ WORKDIR /app
29
+
30
+ # Install system dependencies
31
+ RUN apt-get update && apt-get install -y \
32
+ curl \
33
+ postgresql-client \
34
+ postgresql \
35
+ postgresql-contrib \
36
+ python3 \
37
+ python3-pip \
38
+ python3-venv \
39
+ git \
40
+ && rm -rf /var/lib/apt/lists/*
41
+
42
+ # Create PostgreSQL data directory
43
+ RUN mkdir -p /var/run/postgresql && chown postgres:postgres /var/run/postgresql
44
+
45
+ # Install Node dependencies for health-server
46
+ RUN npm install -g express@4.18.2 cors@2.8.5 morgan@1.10.0 uuid@9.0.1
47
+
48
+ # Install Python dependencies for sync
49
+ RUN pip install --no-cache-dir huggingface_hub==0.24.5 PyYAML==6.0.1
50
+
51
+ # Copy Paperclip build from builder
52
+ COPY --from=paperclip-builder /build/dist /app/paperclip
53
+ COPY --from=paperclip-builder /build/package.json /app/paperclip/
54
+ COPY --from=paperclip-builder /build/node_modules /app/paperclip/node_modules
55
+
56
+ # Copy orchestration files
57
+ COPY start.sh /app/
58
+ COPY health-server.js /app/
59
+ COPY paperclip-sync.py /app/
60
+ COPY cloudflare-proxy.js /app/
61
+ COPY cloudflare-proxy-setup.py /app/
62
+ COPY cloudflare-worker.js /app/
63
+ COPY setup-uptimerobot.sh /app/
64
+
65
+ # Make scripts executable
66
+ RUN chmod +x /app/start.sh /app/setup-uptimerobot.sh
67
+
68
+ # Create persistent storage directory
69
+ RUN mkdir -p /paperclip /var/lib/postgresql/data && \
70
+ chown -R postgres:postgres /var/lib/postgresql/data /paperclip
71
+
72
+ # Set secure file permissions
73
+ RUN umask 0077
74
+
75
+ EXPOSE 7861
76
+
77
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=90s --retries=3 \
78
+ CMD curl -f http://localhost:7861/health || exit 1
79
+
80
+ CMD ["/app/start.sh"]
LICENSE ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 HuggingClip Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
22
+
23
+ ---
24
+
25
+ This project builds upon:
26
+ - Paperclip: https://github.com/paperclipai/paperclip (MIT License)
27
+ - HuggingClaw: https://github.com/democra-ai/HuggingClaw (Apache 2.0)
28
+ - Hugging8n: https://github.com/somratpro/Hugging8n (MIT License)
README.md ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # πŸ”— HuggingClip
2
+
3
+ Paperclip AI Agent Orchestration Platform running on Hugging Face Spaces.
4
+
5
+ Deploy your own instance of [Paperclip](https://paperclip.ing/) β€” the open-source platform for orchestrating AI agents to run autonomous businesses β€” on Hugging Face Spaces with automatic persistent backup to Hugging Face Datasets.
6
+
7
+ **Features:**
8
+ - βœ… Run Paperclip on HF Spaces (free tier compatible)
9
+ - βœ… Automatic database backup to HF Dataset (survives restarts)
10
+ - βœ… Health monitoring dashboard with real-time status
11
+ - βœ… One-click deploy with configuration via environment variables
12
+ - βœ… Cloudflare proxy integration (for network-restricted providers)
13
+ - βœ… Graceful shutdown and data persistence
14
+
15
+ ## Quick Start
16
+
17
+ ### 1-Click Deploy (Recommended)
18
+
19
+ [Deploy to Hugging Face Spaces](https://huggingface.co/new-space?template=somratpro/HuggingClip)
20
+
21
+ Or manually:
22
+ 1. Create a new Space on [Hugging Face](https://huggingface.co/new-space)
23
+ 2. Choose **Docker** as the runtime
24
+ 3. Copy this repository as the source
25
+ 4. Configure required secrets (see **Configuration** below)
26
+ 5. Deploy!
27
+
28
+ ### Local Development
29
+
30
+ #### Prerequisites
31
+ - Docker & Docker Compose
32
+ - Node.js 20+ (for direct testing)
33
+ - PostgreSQL 13+ (if running outside Docker)
34
+
35
+ #### Setup
36
+
37
+ ```bash
38
+ # Clone repository
39
+ git clone https://github.com/somratpro/HuggingClip.git
40
+ cd HuggingClip
41
+
42
+ # Copy environment template
43
+ cp .env.example .env
44
+
45
+ # Edit .env with your configuration
46
+ # At minimum set: HF_TOKEN for backup persistence
47
+
48
+ # Start with Docker Compose
49
+ docker-compose up -d
50
+
51
+ # Check health
52
+ curl http://localhost:7861/health
53
+
54
+ # Open dashboard
55
+ open http://localhost:7861/
56
+ ```
57
+
58
+ ## Configuration
59
+
60
+ ### Required Environment Variables
61
+
62
+ | Variable | Description | Example |
63
+ |----------|-------------|---------|
64
+ | `HF_TOKEN` | Hugging Face API token (for backup persistence) | `hf_xxxx...` |
65
+
66
+ Get your HF token from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens)
67
+
68
+ ### Paperclip Configuration
69
+
70
+ | Variable | Default | Description |
71
+ |----------|---------|-------------|
72
+ | `DATABASE_URL` | `postgres://postgres:paperclip@localhost:5432/paperclip` | PostgreSQL connection string |
73
+ | `PORT` | `3100` | Paperclip API port |
74
+ | `NODE_ENV` | `production` | Node.js environment |
75
+ | `PAPERCLIP_HOME` | `/paperclip` | Paperclip data directory |
76
+ | `PAPERCLIP_DEPLOYMENT_MODE` | `authenticated` | Deployment mode (local/authenticated) |
77
+
78
+ ### Agent Provider Keys
79
+
80
+ Configure API keys for your agent providers:
81
+
82
+ ```bash
83
+ # Claude agents
84
+ CLAUDE_API_KEY=sk-ant-xxxx...
85
+
86
+ # Other LLM providers
87
+ LLM_API_KEY=xxxx...
88
+
89
+ # Allow all Claude models
90
+ OPENCODE_ALLOW_ALL_MODELS=true
91
+ ```
92
+
93
+ ### Backup Configuration
94
+
95
+ | Variable | Default | Description |
96
+ |----------|---------|-------------|
97
+ | `SYNC_INTERVAL` | `180` | Backup interval (seconds) |
98
+ | `SYNC_MAX_FILE_BYTES` | `52428800` | Max backup size (50MB) |
99
+ | `BACKUP_DATASET_NAME` | `paperclip-backup` | HF Dataset name for backups |
100
+
101
+ ### Optional: Cloudflare Proxy
102
+
103
+ Enable outbound connections to blocked domains (Telegram, Discord, WhatsApp, etc.):
104
+
105
+ ```bash
106
+ CLOUDFLARE_WORKERS_TOKEN=xxx # From https://dash.cloudflare.com/
107
+ CLOUDFLARE_ACCOUNT_ID=xxx # From Cloudflare dashboard
108
+ ```
109
+
110
+ See [cloudflare-proxy-setup.py](cloudflare-proxy-setup.py) for details.
111
+
112
+ ### Optional: Authentication
113
+
114
+ ```bash
115
+ BETTER_AUTH_SECRET=your-random-secret # Generate: openssl rand -base64 32
116
+ DISCORD_WEBHOOK_URL=https://... # For admin notifications
117
+ ```
118
+
119
+ ## Deployment
120
+
121
+ ### Hugging Face Spaces
122
+
123
+ 1. **Create Space**: [huggingface.co/new-space](https://huggingface.co/new-space)
124
+ - **Space name**: `huggingclip`
125
+ - **Space type**: Public (or Private)
126
+ - **Runtime**: Docker
127
+
128
+ 2. **Configure Secrets**: Settings β†’ Repository Secrets
129
+ - `HF_TOKEN`: Your Hugging Face API token
130
+ - `CLAUDE_API_KEY`: Claude API key (if using Claude agents)
131
+ - Add any other provider keys needed
132
+
133
+ 3. **Deploy**: Push to the Space repo or use the web editor
134
+
135
+ 4. **Monitor**: Dashboard appears at `https://your-username-huggingclip.hf.space/`
136
+
137
+ ### Docker
138
+
139
+ ```bash
140
+ # Build
141
+ docker build -t huggingclip .
142
+
143
+ # Run
144
+ docker run -d \
145
+ -p 7861:7861 \
146
+ -e HF_TOKEN=hf_xxxx... \
147
+ -e CLAUDE_API_KEY=sk-ant-xxxx... \
148
+ -e DATABASE_URL=postgres://... \
149
+ -v paperclip_data:/paperclip \
150
+ huggingclip
151
+ ```
152
+
153
+ ### Docker Compose (Development)
154
+
155
+ ```bash
156
+ docker-compose up -d
157
+ ```
158
+
159
+ ## Usage
160
+
161
+ ### Dashboard
162
+
163
+ Access the health monitoring dashboard at: `http://your-space-url/`
164
+
165
+ **Shows:**
166
+ - Paperclip service status (running/down)
167
+ - Database health & last backup timestamp
168
+ - System uptime & start time
169
+ - Quick links to Paperclip UI and API
170
+
171
+ ### Paperclip UI
172
+
173
+ Full Paperclip interface at: `http://your-space-url/app/`
174
+
175
+ **Features:**
176
+ - Create companies and organizational structures
177
+ - Recruit AI agents with specific roles
178
+ - Define tasks and monitor execution
179
+ - View conversation logs and agent decisions
180
+ - Manage budgets and costs
181
+ - Approve/override agent actions
182
+
183
+ ### API
184
+
185
+ Direct API access at: `http://your-space-url/api/*`
186
+
187
+ Examples:
188
+ ```bash
189
+ # Get API status
190
+ curl http://localhost:7861/health
191
+
192
+ # Check dashboard data
193
+ curl http://localhost:7861/dashboard/status
194
+
195
+ # Access Paperclip API
196
+ curl http://localhost:7861/api/companies
197
+ ```
198
+
199
+ ## Data Persistence
200
+
201
+ ### Automatic Backup
202
+
203
+ HuggingClip automatically backs up your Paperclip database every 180 seconds (configurable):
204
+
205
+ 1. **Database dump** - PostgreSQL SQL format
206
+ 2. **Paperclip data** - Config files, plugins, etc.
207
+ 3. **Upload to HF** - Stored in your `paperclip-backup` Dataset
208
+ 4. **On restart** - Data automatically restored
209
+
210
+ You can view backups at: `https://huggingface.co/datasets/your-username/paperclip-backup`
211
+
212
+ ### Manual Backup
213
+
214
+ ```bash
215
+ # From inside container
216
+ python3 /app/paperclip-sync.py sync
217
+ ```
218
+
219
+ ### Manual Restore
220
+
221
+ ```bash
222
+ # From inside container
223
+ python3 /app/paperclip-sync.py restore
224
+ ```
225
+
226
+ ## Troubleshooting
227
+
228
+ ### Database Connection Failed
229
+
230
+ **Problem**: "Cannot connect to PostgreSQL"
231
+
232
+ **Solution:**
233
+ 1. Check DATABASE_URL is correct: `postgres://user:pass@host:port/db`
234
+ 2. Verify PostgreSQL is running: `docker ps | grep postgres`
235
+ 3. Check credentials in DATABASE_URL match PostgreSQL setup
236
+ 4. Wait 10-30s for PostgreSQL to initialize on first startup
237
+
238
+ ### Backup Not Uploading
239
+
240
+ **Problem**: "Sync status shows error"
241
+
242
+ **Solution:**
243
+ 1. Verify `HF_TOKEN` is set and valid
244
+ 2. Check HF Dataset is created: `huggingface-cli repo info datasets/your-username/paperclip-backup`
245
+ 3. Look at container logs: `docker logs huggingclip-app`
246
+ 4. Run manual backup: `python3 paperclip-sync.py sync`
247
+
248
+ ### Paperclip Not Accessible
249
+
250
+ **Problem**: Can't reach http://localhost:7861/app/
251
+
252
+ **Solution:**
253
+ 1. Check container is running: `docker ps`
254
+ 2. Check ports are exposed: `docker port huggingclip-app`
255
+ 3. Verify port 3100 is not blocked
256
+ 4. Check health: `curl http://localhost:7861/health`
257
+ 5. Look at Paperclip logs: Search container logs for errors
258
+
259
+ ### Space Keeps Restarting
260
+
261
+ **Problem**: Container exits repeatedly
262
+
263
+ **Solution:**
264
+ 1. Check logs: `docker logs --tail=100 huggingclip-app`
265
+ 2. Common causes:
266
+ - Invalid DATABASE_URL
267
+ - Missing required env vars
268
+ - PostgreSQL not responding
269
+ - Out of memory (HF free tier is limited)
270
+ 3. Verify all required env vars are set correctly
271
+
272
+ ### Out of Memory
273
+
274
+ **Problem**: "Killed" message or container restarts
275
+
276
+ **Solution:**
277
+ 1. HF Spaces free tier: 2 vCPU, 16GB RAM, 50GB storage
278
+ 2. Reduce backup interval: `SYNC_INTERVAL=600` (every 10 min instead of 3)
279
+ 3. Reduce database size: Archive old agent runs and conversations
280
+ 4. Use upgraded HF Space (Pro) for more resources
281
+
282
+ ## Architecture
283
+
284
+ ### Components
285
+
286
+ 1. **Health Server** (Node.js, port 7861)
287
+ - Public gateway + dashboard
288
+ - Proxies requests to Paperclip
289
+ - Health checks for monitoring
290
+
291
+ 2. **Paperclip** (Node.js, port 3100)
292
+ - Main AI agent orchestration app
293
+ - React UI + REST API
294
+ - PostgreSQL database
295
+
296
+ 3. **PostgreSQL** (port 5432)
297
+ - Stores companies, agents, tasks, conversations
298
+ - Embedded in container
299
+ - Synced to HF Dataset
300
+
301
+ 4. **Sync Worker** (Python)
302
+ - Periodic backup to Hugging Face
303
+ - Restore on startup
304
+ - Handles database persistence
305
+
306
+ 5. **Cloudflare Proxy** (Optional)
307
+ - Bypasses HF Spaces network blocks
308
+ - Routes outbound API calls
309
+ - Auto-provisioned if token provided
310
+
311
+ ### Data Flow
312
+
313
+ ```
314
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
315
+ β”‚ Paperclip UI β”‚ (http://space-url/app/)
316
+ β”‚ & REST API β”‚
317
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜
318
+ β”‚
319
+ (port 3100)
320
+ β”‚
321
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β–Όβ”€β”€β”€β”€β”€β”€β”€β”€β” Every 180s β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
322
+ β”‚ Health Server │────────────────────▢ β”‚ Sync to HF β”‚
323
+ β”‚ (7861) β”‚ β”‚ (PostgreSQL dump)β”‚
324
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
325
+ β”‚ β”‚
326
+ β”‚ β–Ό
327
+ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
328
+ └────────────────────────▢│ HF Dataset Backup β”‚
329
+ β”‚ paperclip-backup β”‚
330
+ ◀─────────────────────────│ β”‚
331
+ β”‚ (on restart) β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
332
+ β–Ό
333
+ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”
334
+ β”‚ PostgreSQL β”‚
335
+ β”‚ /paperclip β”‚
336
+ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
337
+ ```
338
+
339
+ ## Backup Retention
340
+
341
+ HuggingClip stores only the **latest backup** in HF Dataset (`snapshots/latest.tar.gz`).
342
+
343
+ **To keep multiple backups manually:**
344
+ ```bash
345
+ # Download backup from HF
346
+ huggingface-cli download datasets/your-username/paperclip-backup \
347
+ snapshots/latest.tar.gz --repo-type dataset
348
+
349
+ # Save a copy
350
+ cp latest.tar.gz paperclip-backup-$(date +%Y%m%d-%H%M%S).tar.gz
351
+ ```
352
+
353
+ ## Monitoring
354
+
355
+ ### UptimeRobot Integration
356
+
357
+ Prevent HF Spaces from sleeping (free tier auto-suspends idle Spaces):
358
+
359
+ 1. Create UptimeRobot account: [uptimerobot.com](https://uptimerobot.com)
360
+ 2. Add monitor: `https://your-space-url/health` (HTTP check every 5 min)
361
+ 3. Configure alerts in HuggingClip:
362
+ - `POST /dashboard/uptimerobot/setup` with webhook URL
363
+ 4. UptimeRobot will ping your Space regularly, preventing sleep
364
+
365
+ ### Health Check
366
+
367
+ The `/health` endpoint returns JSON with full service status:
368
+
369
+ ```bash
370
+ curl -s http://localhost:7861/health | jq .
371
+ ```
372
+
373
+ Response includes:
374
+ - Service uptime
375
+ - Database status
376
+ - Last backup timestamp
377
+ - Any errors
378
+
379
+ ## Contributing
380
+
381
+ Found a bug? Want to improve HuggingClip?
382
+
383
+ 1. Check [HuggingClip issues](https://github.com/somratpro/HuggingClip/issues)
384
+ 2. Submit PR with:
385
+ - Clear description of changes
386
+ - Any needed documentation updates
387
+ - Test of changes locally
388
+
389
+ ## License
390
+
391
+ MIT License - see [LICENSE](LICENSE) file
392
+
393
+ ## Resources
394
+
395
+ - **Paperclip**: [paperclip.ing](https://paperclip.ing/) | [GitHub](https://github.com/paperclipai/paperclip)
396
+ - **Documentation**: [docs.paperclip.ing](https://docs.paperclip.ing)
397
+ - **HuggingClip**: [GitHub](https://github.com/somratpro/HuggingClip)
398
+
399
+ ## Support
400
+
401
+ - πŸ“– [Paperclip Docs](https://docs.paperclip.ing)
402
+ - πŸ’¬ [Paperclip Discord](https://discord.gg/paperclipai)
403
+ - πŸ› [Report Issues](https://github.com/somratpro/HuggingClip/issues)
404
+
405
+ ---
406
+
407
+ Made with ❀️ for the AI agent community
SECURITY.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ | Version | Supported |
6
+ |---------|-----------|
7
+ | 1.x | βœ… Yes |
8
+
9
+ ## Reporting a Vulnerability
10
+
11
+ **Do NOT open a public issue for security vulnerabilities.**
12
+
13
+ Email: security@somratpro.com (or create a private GitHub security advisory)
14
+
15
+ Include:
16
+ - Description of the vulnerability
17
+ - Steps to reproduce
18
+ - Potential impact
19
+ - Suggested fix (if any)
20
+
21
+ We will respond within 48 hours and aim to patch critical issues within 7 days.
22
+
23
+ ## Security Best Practices
24
+
25
+ ### Secrets Management
26
+
27
+ - **Never commit secrets to git** β€” use HF Space secrets or environment variables
28
+ - `HF_TOKEN`: Store as HF Space secret, not in code
29
+ - `CLAUDE_API_KEY`, `LLM_API_KEY`: Same β€” HF Space secrets only
30
+ - `BETTER_AUTH_SECRET`: Generate strong random secret (`openssl rand -base64 32`)
31
+ - Rotate tokens if accidentally exposed
32
+
33
+ ### Network Security
34
+
35
+ - `umask 0077` enforced at startup β€” all files created owner-only
36
+ - Cloudflare proxy uses shared secret for authentication
37
+ - No hardcoded credentials anywhere in codebase
38
+
39
+ ### Database Security
40
+
41
+ - PostgreSQL runs locally inside container β€” not exposed externally
42
+ - HF Dataset backups are **private by default**
43
+ - Backup file contains all database data β€” protect your HF Dataset access
44
+
45
+ ### API Security
46
+
47
+ - Paperclip API runs on port 3100 (internal only)
48
+ - Port 7861 exposes health dashboard and proxied access only
49
+ - Configure `BETTER_AUTH_SECRET` for production authentication
50
+ - Use `PAPERCLIP_DEPLOYMENT_MODE=authenticated` for public-facing deployments
51
+
52
+ ### Container Security
53
+
54
+ - Based on `node:lts-trixie-slim` (minimal attack surface)
55
+ - No root process execution where avoidable
56
+ - Regular base image updates recommended
57
+
58
+ ## Known Limitations
59
+
60
+ - HF Spaces free tier is public β€” anyone can access your Paperclip UI unless auth is configured
61
+ - Database backup stored in HF Dataset β€” ensure dataset is **private**
62
+ - Cloudflare Worker proxy can access proxied traffic β€” review before enabling
cloudflare-proxy-setup.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ import secrets
7
+ import sys
8
+ import urllib.error
9
+ import urllib.request
10
+ from pathlib import Path
11
+
12
+ API_BASE = "https://api.cloudflare.com/client/v4"
13
+ ENV_FILE = Path("/tmp/huggingclaw-cloudflare-proxy.env")
14
+ DEFAULT_ALLOWED = [
15
+ "api.telegram.org",
16
+ "discord.com",
17
+ "discordapp.com",
18
+ "gateway.discord.gg",
19
+ "status.discord.com",
20
+ "web.whatsapp.com",
21
+ "graph.facebook.com",
22
+ "googleapis.com",
23
+ "google.com",
24
+ "googleusercontent.com",
25
+ "gstatic.com",
26
+ ]
27
+
28
+
29
+ def cf_request(method: str, path: str, token: str, body: bytes | None = None, content_type: str = "application/json"):
30
+ req = urllib.request.Request(
31
+ f"{API_BASE}{path}",
32
+ data=body,
33
+ method=method,
34
+ headers={
35
+ "Authorization": f"Bearer {token}",
36
+ "Content-Type": content_type,
37
+ },
38
+ )
39
+ with urllib.request.urlopen(req, timeout=30) as response:
40
+ payload = json.loads(response.read().decode("utf-8"))
41
+ if not payload.get("success"):
42
+ errors = payload.get("errors") or [{"message": "Unknown Cloudflare API error"}]
43
+ raise RuntimeError(errors[0].get("message", "Unknown Cloudflare API error"))
44
+ return payload["result"]
45
+
46
+
47
+ def slugify(value: str) -> str:
48
+ cleaned = re.sub(r"[^a-z0-9-]+", "-", value.lower()).strip("-")
49
+ cleaned = re.sub(r"-{2,}", "-", cleaned)
50
+ if not cleaned:
51
+ cleaned = "huggingclaw-proxy"
52
+ return cleaned[:63].rstrip("-")
53
+
54
+
55
+ def derive_worker_name() -> str:
56
+ explicit = os.environ.get("CLOUDFLARE_WORKER_NAME", "").strip()
57
+ if explicit:
58
+ return slugify(explicit)
59
+ space_host = os.environ.get("SPACE_HOST", "").strip()
60
+ if space_host:
61
+ base = space_host.replace(".hf.space", "")
62
+ return slugify(f"{base}-proxy")
63
+ return "huggingclaw-proxy"
64
+
65
+
66
+ def render_worker(secret_value: str, allowed_targets: list[str], allow_proxy_all: bool) -> str:
67
+ allowed_json = json.dumps(allowed_targets)
68
+ allow_all_js = "true" if allow_proxy_all else "false"
69
+ secret_json = json.dumps(secret_value)
70
+ return f"""addEventListener("fetch", (event) => {{
71
+ event.respondWith(handleRequest(event.request));
72
+ }});
73
+
74
+ const PROXY_SHARED_SECRET = {secret_json};
75
+ const ALLOW_PROXY_ALL = {allow_all_js};
76
+ const ALLOWED_TARGETS = {allowed_json};
77
+
78
+ function isAllowedHost(hostname) {{
79
+ const normalized = String(hostname || "").trim().toLowerCase();
80
+ if (!normalized) return false;
81
+ if (ALLOW_PROXY_ALL) return true;
82
+ return ALLOWED_TARGETS.some(
83
+ (domain) => normalized === domain || normalized.endsWith(`.${{domain}}`),
84
+ );
85
+ }}
86
+
87
+ async function handleRequest(request) {{
88
+ const url = new URL(request.url);
89
+ const queryTarget = url.searchParams.get("proxy_target");
90
+ const targetHost = request.headers.get("x-target-host") || queryTarget;
91
+
92
+ if (PROXY_SHARED_SECRET) {{
93
+ const providedSecret = request.headers.get("x-proxy-key") || url.searchParams.get("proxy_key") || "";
94
+ if (providedSecret !== PROXY_SHARED_SECRET) {{
95
+ if (url.pathname.startsWith("/bot") && !targetHost) {{
96
+ // Allowed fallback
97
+ }} else {{
98
+ return new Response("Unauthorized: Invalid proxy key", {{ status: 401 }});
99
+ }}
100
+ }}
101
+ }}
102
+
103
+ let targetBase = "";
104
+ if (targetHost) {{
105
+ if (!isAllowedHost(targetHost)) {{
106
+ return new Response(`Forbidden: Host ${{targetHost}} is not allowed.`, {{ status: 403 }});
107
+ }}
108
+ targetBase = `https://${{targetHost}}`;
109
+ }} else if (url.pathname.startsWith("/bot")) {{
110
+ targetBase = "https://api.telegram.org";
111
+ }} else {{
112
+ return new Response("Invalid request: No target host provided.", {{ status: 400 }});
113
+ }}
114
+
115
+ const cleanSearch = new URLSearchParams(url.search);
116
+ cleanSearch.delete("proxy_target");
117
+ cleanSearch.delete("proxy_key");
118
+ const searchStr = cleanSearch.toString();
119
+ const targetUrl = targetBase + url.pathname + (searchStr ? `?${{searchStr}}` : "");
120
+
121
+ const headers = new Headers(request.headers);
122
+ headers.delete("cf-connecting-ip");
123
+ headers.delete("cf-ray");
124
+ headers.delete("cf-visitor");
125
+ headers.delete("host");
126
+ headers.delete("x-real-ip");
127
+ headers.delete("x-target-host");
128
+ headers.delete("x-proxy-key");
129
+
130
+ const proxiedRequest = new Request(targetUrl, {{
131
+ method: request.method,
132
+ headers,
133
+ body: request.body,
134
+ redirect: "follow",
135
+ }});
136
+
137
+ try {{
138
+ return await fetch(proxiedRequest);
139
+ }} catch (error) {{
140
+ return new Response(`Proxy Error: ${{error.message}}`, {{ status: 502 }});
141
+ }}
142
+ }}
143
+ """
144
+
145
+
146
+ def write_env(proxy_url: str, proxy_secret: str) -> None:
147
+ ENV_FILE.write_text(
148
+ "\n".join(
149
+ [
150
+ f'export CLOUDFLARE_PROXY_URL="{proxy_url}"',
151
+ f'export CLOUDFLARE_PROXY_SECRET="{proxy_secret}"',
152
+ ]
153
+ )
154
+ + "\n",
155
+ encoding="utf-8",
156
+ )
157
+ # Belt-and-suspenders: even with umask 0077 on the parent shell, force
158
+ # 0600 since the file holds the worker shared secret.
159
+ try:
160
+ ENV_FILE.chmod(0o600)
161
+ except OSError:
162
+ pass
163
+
164
+
165
+ def main() -> int:
166
+ existing_url = os.environ.get("CLOUDFLARE_PROXY_URL", "").strip()
167
+ existing_secret = os.environ.get("CLOUDFLARE_PROXY_SECRET", "").strip()
168
+ api_token = os.environ.get("CLOUDFLARE_WORKERS_TOKEN", "").strip()
169
+
170
+ if existing_url:
171
+ # Always write the env file so downstream `. $CF_PROXY_ENV_FILE` in
172
+ # start.sh has CLOUDFLARE_PROXY_URL set even when no secret was
173
+ # supplied. Empty secret means we send no x-proxy-key header β€” that
174
+ # only works if the deployed worker also has no secret baked in.
175
+ write_env(existing_url, existing_secret)
176
+ if not existing_secret:
177
+ print(
178
+ "Warning: CLOUDFLARE_PROXY_URL is set but CLOUDFLARE_PROXY_SECRET "
179
+ "is empty. Requests will succeed only if the deployed worker "
180
+ "was built without PROXY_SHARED_SECRET; otherwise you'll see "
181
+ "401 Unauthorized.",
182
+ file=sys.stderr,
183
+ )
184
+ return 0
185
+
186
+ if not api_token:
187
+ return 0
188
+
189
+ account_id = os.environ.get("CLOUDFLARE_ACCOUNT_ID", "").strip()
190
+ try:
191
+ if not account_id:
192
+ accounts = cf_request("GET", "/accounts", api_token)
193
+ if not accounts:
194
+ raise RuntimeError("No Cloudflare account available for this token.")
195
+ account_id = accounts[0]["id"]
196
+
197
+ subdomain_info = cf_request(
198
+ "GET",
199
+ f"/accounts/{account_id}/workers/subdomain",
200
+ api_token,
201
+ )
202
+ subdomain = (subdomain_info or {}).get("subdomain", "").strip()
203
+ if not subdomain:
204
+ raise RuntimeError(
205
+ "Cloudflare Workers subdomain is not configured. Enable workers.dev in your Cloudflare account first."
206
+ )
207
+
208
+ worker_name = derive_worker_name()
209
+ allowed_raw = os.environ.get("CLOUDFLARE_PROXY_DOMAINS", "").strip()
210
+ allow_proxy_all = not allowed_raw or allowed_raw == "*"
211
+ allowed_targets = DEFAULT_ALLOWED if not allowed_raw or allow_proxy_all else [
212
+ value.strip() for value in allowed_raw.split(",") if value.strip()
213
+ ]
214
+ proxy_secret = existing_secret or secrets.token_urlsafe(24)
215
+ worker_source = render_worker(proxy_secret, allowed_targets, allow_proxy_all)
216
+
217
+ cf_request(
218
+ "PUT",
219
+ f"/accounts/{account_id}/workers/scripts/{worker_name}",
220
+ api_token,
221
+ body=worker_source.encode("utf-8"),
222
+ content_type="application/javascript",
223
+ )
224
+ cf_request(
225
+ "POST",
226
+ f"/accounts/{account_id}/workers/scripts/{worker_name}/subdomain",
227
+ api_token,
228
+ body=json.dumps({"enabled": True, "previews_enabled": True}).encode("utf-8"),
229
+ )
230
+
231
+ proxy_url = f"https://{worker_name}.{subdomain}.workers.dev"
232
+ write_env(proxy_url, proxy_secret)
233
+ return 0
234
+ except urllib.error.HTTPError as error:
235
+ detail = error.read().decode("utf-8", errors="replace")
236
+ if error.code == 403 and '"code":9109' in detail:
237
+ print(
238
+ "Cloudflare proxy setup failed: invalid Workers token. "
239
+ "Use a Cloudflare API Token in CLOUDFLARE_WORKERS_TOKEN "
240
+ "(not a Global API Key, tunnel token, or worker secret). "
241
+ "For auto-setup, it should have account-level 'Workers Scripts: Edit'. "
242
+ "The setup can auto-discover your account; CLOUDFLARE_ACCOUNT_ID is not required.",
243
+ file=sys.stderr,
244
+ )
245
+ print(f"Cloudflare proxy setup failed: HTTP {error.code} {detail}", file=sys.stderr)
246
+ return 1
247
+ except Exception as error:
248
+ print(f"Cloudflare proxy setup failed: {error}", file=sys.stderr)
249
+ return 1
250
+
251
+
252
+ if __name__ == "__main__":
253
+ raise SystemExit(main())
cloudflare-proxy.js ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Cloudflare Proxy: Transparent Fix for Blocked Domains
3
+ *
4
+ * Patches https.request/http.request/fetch and undici to redirect traffic
5
+ * for blocked hosts through a Cloudflare Worker proxy.
6
+ */
7
+ "use strict";
8
+
9
+ const https = require("https");
10
+ const http = require("http");
11
+
12
+ // Use stderr for logs to avoid breaking child processes that communicate via stdout JSON
13
+ const log = (...args) => console.error(...args);
14
+
15
+ let PROXY_URL = process.env.CLOUDFLARE_PROXY_URL;
16
+ if (
17
+ PROXY_URL &&
18
+ !PROXY_URL.startsWith("http://") &&
19
+ !PROXY_URL.startsWith("https://")
20
+ ) {
21
+ PROXY_URL = `https://${PROXY_URL}`;
22
+ }
23
+
24
+ const DEBUG = process.env.CLOUDFLARE_PROXY_DEBUG === "true";
25
+ const PROXY_SHARED_SECRET = (process.env.CLOUDFLARE_PROXY_SECRET || "").trim();
26
+ const PROXY_DOMAINS = process.env.CLOUDFLARE_PROXY_DOMAINS || "*";
27
+ const BLOCKED_DOMAINS = PROXY_DOMAINS.split(",")
28
+ .map((domain) => domain.trim())
29
+ .filter(Boolean);
30
+ const PROXY_ALL = PROXY_DOMAINS === "*";
31
+
32
+ if (PROXY_URL) {
33
+ try {
34
+ const proxy = new URL(PROXY_URL);
35
+ const originalHttpsRequest = https.request;
36
+ const originalHttpRequest = http.request;
37
+ const originalFetch =
38
+ typeof globalThis.fetch === "function" ? globalThis.fetch.bind(globalThis) : null;
39
+
40
+ const shouldProxyHost = (hostname) => {
41
+ const normalized = String(hostname || "").trim().toLowerCase();
42
+ if (!normalized) return false;
43
+
44
+ const isInternal =
45
+ normalized === "localhost" ||
46
+ normalized === "127.0.0.1" ||
47
+ normalized === "::1" ||
48
+ normalized === "0.0.0.0" ||
49
+ normalized === proxy.hostname ||
50
+ normalized.endsWith(".hf.space") ||
51
+ normalized.endsWith(".huggingface.co") ||
52
+ normalized === "huggingface.co";
53
+
54
+ const should = PROXY_ALL ? !isInternal : BLOCKED_DOMAINS.some(
55
+ (domain) =>
56
+ normalized === domain || normalized.endsWith(`.${domain}`),
57
+ );
58
+
59
+ return should;
60
+ };
61
+
62
+ const patch = (original, originalModuleName) => {
63
+ return function patchedRequest(arg1, arg2, arg3) {
64
+ let options = {};
65
+ let callback;
66
+
67
+ if (typeof arg1 === "string" || arg1 instanceof URL) {
68
+ const url = typeof arg1 === "string" ? new URL(arg1) : arg1;
69
+ options = {
70
+ protocol: url.protocol,
71
+ hostname: url.hostname,
72
+ port: url.port,
73
+ path: url.pathname + url.search,
74
+ };
75
+ if (typeof arg2 === "object" && arg2 !== null) {
76
+ options = { ...options, ...arg2 };
77
+ callback = arg3;
78
+ } else {
79
+ callback = arg2;
80
+ }
81
+ } else {
82
+ options = { ...arg1 };
83
+ callback = arg2;
84
+ }
85
+
86
+ const hostname =
87
+ options.hostname ||
88
+ (options.host ? String(options.host).split(":")[0] : "");
89
+ const path = options.path || "/";
90
+ const headers = options.headers || {};
91
+
92
+ const shouldProxy = shouldProxyHost(hostname);
93
+ const alreadyProxied = options._proxied;
94
+ const hasTargetHeader =
95
+ headers["x-target-host"] || headers["X-Target-Host"];
96
+
97
+ if (shouldProxy && !alreadyProxied && !hasTargetHeader) {
98
+ if (DEBUG) {
99
+ log(
100
+ `[cloudflare-proxy] Redirecting ${originalModuleName}://${hostname}${path} -> ${proxy.hostname}`,
101
+ );
102
+ }
103
+
104
+ const newOptions = { ...options };
105
+ newOptions._proxied = true;
106
+ newOptions.protocol = "https:";
107
+ newOptions.hostname = proxy.hostname;
108
+ newOptions.port = proxy.port || 443;
109
+ newOptions.servername = proxy.hostname;
110
+ delete newOptions.host;
111
+ delete newOptions.agent;
112
+
113
+ newOptions.headers = {
114
+ ...(options.headers || {}),
115
+ host: proxy.host,
116
+ "x-target-host": hostname,
117
+ };
118
+
119
+ if (PROXY_SHARED_SECRET) {
120
+ newOptions.headers["x-proxy-key"] = PROXY_SHARED_SECRET;
121
+ }
122
+
123
+ return originalHttpsRequest.call(https, newOptions, callback);
124
+ }
125
+
126
+ return original.call(this, arg1, arg2, arg3);
127
+ };
128
+ };
129
+
130
+ https.request = patch(originalHttpsRequest, "https");
131
+ http.request = patch(originalHttpRequest, "http");
132
+
133
+ if (originalFetch) {
134
+ globalThis.fetch = async function patchedFetch(input, init) {
135
+ const request = input instanceof Request ? input : null;
136
+ const urlStr = request ? request.url : String(input);
137
+
138
+ let url;
139
+ try {
140
+ url = new URL(urlStr);
141
+ } catch (e) {
142
+ return originalFetch(input, init);
143
+ }
144
+
145
+ const hostname = url.hostname;
146
+ const shouldProxy = shouldProxyHost(hostname);
147
+
148
+ let mergedHeaders;
149
+ if (request) {
150
+ mergedHeaders = new Headers(request.headers);
151
+ } else {
152
+ mergedHeaders = new Headers(init?.headers || {});
153
+ }
154
+
155
+ const alreadyProxied =
156
+ mergedHeaders.has("x-target-host") || mergedHeaders.has("X-Target-Host");
157
+
158
+ if (!shouldProxy || alreadyProxied) {
159
+ return originalFetch(input, init);
160
+ }
161
+
162
+ if (DEBUG) {
163
+ log(
164
+ `[cloudflare-proxy] Redirecting fetch://${hostname}${url.pathname}${url.search} -> ${proxy.hostname}`,
165
+ );
166
+ }
167
+
168
+ mergedHeaders.set("x-target-host", hostname);
169
+ if (PROXY_SHARED_SECRET) {
170
+ mergedHeaders.set("x-proxy-key", PROXY_SHARED_SECRET);
171
+ }
172
+
173
+ const proxiedUrl = new URL(url.pathname + url.search, proxy);
174
+
175
+ const logProxyError = (promise, debugInfo) => {
176
+ promise
177
+ .then(r => {
178
+ if (DEBUG && !r.ok) {
179
+ log(`[cloudflare-proxy] Proxy HTTP ${r.status} for ${hostname}: ${r.statusText}`);
180
+ }
181
+ })
182
+ .catch(err => {
183
+ const cause = err?.cause;
184
+ const causeStr = cause
185
+ ? ` | cause: ${cause?.code || cause?.message || String(cause)}`
186
+ : "";
187
+ log(`[cloudflare-proxy] Proxy FAILED ${hostname}: ${err?.message}${causeStr}`);
188
+ if (DEBUG && debugInfo) {
189
+ log(`[cloudflare-proxy] Debug: ${debugInfo}`);
190
+ }
191
+ });
192
+ return promise;
193
+ };
194
+
195
+ if (request) {
196
+ const fetchOpts = {
197
+ method: request.method,
198
+ headers: mergedHeaders,
199
+ redirect: request.redirect,
200
+ };
201
+ if (request.body) {
202
+ fetchOpts.body = request.body;
203
+ fetchOpts.duplex = "half";
204
+ }
205
+ return logProxyError(
206
+ originalFetch(String(proxiedUrl), fetchOpts),
207
+ `request-mode method=${request.method} hasBody=${!!request.body}`,
208
+ );
209
+ }
210
+
211
+ // Build a fresh init: do NOT spread `init` because it may carry a
212
+ // `dispatcher`/`client` pinned to the original target's connection
213
+ // pool, which causes undici to throw UND_ERR_INVALID_ARG when we
214
+ // change the origin. Forward only well-known fetch options.
215
+ const newInit = {
216
+ method: init?.method || "GET",
217
+ headers: mergedHeaders,
218
+ };
219
+ if (init?.body != null) {
220
+ newInit.body = init.body;
221
+ if (init.body instanceof ReadableStream) {
222
+ newInit.duplex = init.duplex || "half";
223
+ }
224
+ }
225
+ if (init?.signal) newInit.signal = init.signal;
226
+ if (init?.redirect) newInit.redirect = init.redirect;
227
+ if (init?.credentials) newInit.credentials = init.credentials;
228
+ if (init?.cache) newInit.cache = init.cache;
229
+ if (init?.mode) newInit.mode = init.mode;
230
+ if (init?.referrer) newInit.referrer = init.referrer;
231
+ if (init?.referrerPolicy) newInit.referrerPolicy = init.referrerPolicy;
232
+ if (init?.integrity) newInit.integrity = init.integrity;
233
+ if (init?.keepalive != null) newInit.keepalive = init.keepalive;
234
+
235
+ const bodyType = init?.body == null
236
+ ? "none"
237
+ : init.body instanceof ReadableStream
238
+ ? "ReadableStream"
239
+ : (init.body?.constructor?.name || typeof init.body);
240
+
241
+ return logProxyError(
242
+ originalFetch(String(proxiedUrl), newInit),
243
+ `init-mode method=${newInit.method} body=${bodyType} initKeys=${Object.keys(init || {}).join(",")}`,
244
+ );
245
+ };
246
+ }
247
+
248
+ // undici patching
249
+ const patchUndiciInstance = (exports) => {
250
+ if (!exports) return;
251
+
252
+ const patchDispatch = (proto, name) => {
253
+ if (proto && proto.dispatch && !proto.dispatch._patched) {
254
+ const origDispatch = proto.dispatch;
255
+ proto.dispatch = function(options, handler) {
256
+ let origin = options.origin || this.origin;
257
+ if (origin && typeof origin !== 'string') {
258
+ try { origin = origin.origin || origin.toString(); } catch (e) { origin = ""; }
259
+ }
260
+
261
+ let hostname = "";
262
+ try {
263
+ hostname = new URL(String(origin)).hostname;
264
+ } catch(e) {
265
+ hostname = String(origin || "").split(':')[0];
266
+ }
267
+
268
+ if (hostname && shouldProxyHost(hostname)) {
269
+ if (DEBUG) log(`[cloudflare-proxy] Redirecting undici ${name}.dispatch: ${hostname}${options.path || ""} -> ${proxy.hostname}`);
270
+
271
+ const targetHeader = "x-target-host";
272
+ const secretHeader = "x-proxy-key";
273
+
274
+ if (Array.isArray(options.headers)) {
275
+ let foundTarget = false;
276
+ for (let i = 0; i < options.headers.length; i += 2) {
277
+ if (String(options.headers[i]).toLowerCase() === targetHeader) {
278
+ foundTarget = true;
279
+ break;
280
+ }
281
+ }
282
+ if (!foundTarget) {
283
+ options.headers.push(targetHeader, hostname);
284
+ if (PROXY_SHARED_SECRET) options.headers.push(secretHeader, PROXY_SHARED_SECRET);
285
+ }
286
+ } else {
287
+ options.headers = options.headers || {};
288
+ if (options.headers instanceof Map || (typeof options.headers.set === 'function')) {
289
+ options.headers.set(targetHeader, hostname);
290
+ if (PROXY_SHARED_SECRET) options.headers.set(secretHeader, PROXY_SHARED_SECRET);
291
+ } else {
292
+ options.headers[targetHeader] = hostname;
293
+ if (PROXY_SHARED_SECRET) options.headers[secretHeader] = PROXY_SHARED_SECRET;
294
+ }
295
+ }
296
+ options.origin = `https://${proxy.hostname}`;
297
+ }
298
+ return origDispatch.call(this, options, handler);
299
+ };
300
+ proto.dispatch._patched = true;
301
+ }
302
+ };
303
+
304
+ for (const key in exports) {
305
+ if (exports[key] && exports[key].prototype && typeof exports[key].prototype.dispatch === 'function') {
306
+ patchDispatch(exports[key].prototype, key);
307
+ }
308
+ }
309
+
310
+ if (exports.getGlobalDispatcher) {
311
+ try {
312
+ const globalDispatcher = exports.getGlobalDispatcher();
313
+ if (globalDispatcher && globalDispatcher.dispatch && !globalDispatcher.dispatch._patched) {
314
+ patchDispatch(globalDispatcher, "GlobalDispatcherInstance");
315
+ }
316
+ } catch (e) {}
317
+ }
318
+
319
+ // Also patch Agent and other potentially unexported classes if they have dispatch
320
+ if (exports.Agent && exports.Agent.prototype) patchDispatch(exports.Agent.prototype, "Agent");
321
+ if (exports.Pool && exports.Pool.prototype) patchDispatch(exports.Pool.prototype, "Pool");
322
+ if (exports.Client && exports.Client.prototype) patchDispatch(exports.Client.prototype, "Client");
323
+
324
+ if (exports.fetch && !exports.fetch._patched) {
325
+ const origFetch = exports.fetch;
326
+ exports.fetch = async function (input, init) {
327
+ // If we are calling undici.fetch, it should use our globalThis.fetch which is patched
328
+ return globalThis.fetch(input, init);
329
+ };
330
+ exports.fetch._patched = true;
331
+ }
332
+ };
333
+
334
+ // Try to require undici immediately
335
+ try {
336
+ const undici = require("undici");
337
+ patchUndiciInstance(undici);
338
+ } catch (e) {}
339
+
340
+ // Hook require() to patch any undici instance the moment it loads.
341
+ // Match either the bare "undici" id or paths whose final package
342
+ // segment IS undici (e.g. "/foo/node_modules/undici/index.js"). The
343
+ // earlier substring check `id.includes("/undici/")` would also match
344
+ // unrelated packages like "super-undici-x".
345
+ const Module = require("module");
346
+ const originalRequire = Module.prototype.require;
347
+ const UNDICI_PATH_RE = /(?:^|\/)node_modules\/undici(?:\/|$)/;
348
+ Module.prototype.require = function (id) {
349
+ const exports = originalRequire.apply(this, arguments);
350
+ if (id === "undici" || UNDICI_PATH_RE.test(id)) {
351
+ try { patchUndiciInstance(exports); } catch (e) {}
352
+ }
353
+ return exports;
354
+ };
355
+
356
+ // Startup banner: print once across all Node spawns. Use a file marker
357
+ // because every Node process (health-server, gateway, sync subprocess)
358
+ // is spawned fresh from bash with NODE_OPTIONS=--require, so an env-var
359
+ // marker won't propagate. /tmp is per-container so it resets on rebuild.
360
+ if (DEBUG) {
361
+ try {
362
+ require("fs").writeFileSync("/tmp/.cf-proxy-banner-shown", "1", {
363
+ flag: "wx",
364
+ });
365
+ log(
366
+ `[cloudflare-proxy] active (${PROXY_ALL ? "wildcard" : "list"}) -> ${proxy.hostname}`,
367
+ );
368
+ } catch (_) {
369
+ // marker exists β€” banner already shown by another process
370
+ }
371
+ }
372
+ } catch (error) {
373
+ log(`[cloudflare-proxy] Failed to initialize: ${error.message}`);
374
+ }
375
+ }
cloudflare-worker.js ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Cloudflare Worker: Universal Outbound Proxy
3
+ *
4
+ * Manual setup:
5
+ * 1. Create a Cloudflare Worker.
6
+ * 2. Paste this file and deploy it.
7
+ * 3. Use the worker URL as CLOUDFLARE_PROXY_URL.
8
+ *
9
+ * Optional worker vars:
10
+ * - PROXY_SHARED_SECRET
11
+ * - ALLOWED_TARGETS
12
+ * - ALLOW_PROXY_ALL
13
+ */
14
+
15
+ function normalizeList(raw) {
16
+ return String(raw || "")
17
+ .split(",")
18
+ .map((value) => value.trim().toLowerCase())
19
+ .filter(Boolean);
20
+ }
21
+
22
+ export default {
23
+ async fetch(request, env) {
24
+ const url = new URL(request.url);
25
+ const queryTarget = url.searchParams.get("proxy_target");
26
+ const targetHost = request.headers.get("x-target-host") || queryTarget;
27
+ const proxySecret = (
28
+ env.PROXY_SHARED_SECRET ||
29
+ env.CLOUDFLARE_PROXY_SECRET ||
30
+ ""
31
+ ).trim();
32
+
33
+ if (proxySecret) {
34
+ const providedSecret = request.headers.get("x-proxy-key") || url.searchParams.get("proxy_key") || "";
35
+ if (providedSecret !== proxySecret) {
36
+ // Fallback: allow Telegram requests via path without secret if it looks like a bot API call.
37
+ // This is safe because it only proxies to api.telegram.org.
38
+ if (url.pathname.startsWith("/bot") && !targetHost) {
39
+ // Allowed
40
+ } else {
41
+ return new Response("Unauthorized: Invalid proxy key", { status: 401 });
42
+ }
43
+ }
44
+ }
45
+
46
+ const allowProxyAll =
47
+ String(env.ALLOW_PROXY_ALL || "true").toLowerCase() === "true";
48
+ const allowedTargets = normalizeList(
49
+ env.ALLOWED_TARGETS || "api.telegram.org,discord.com,discordapp.com,gateway.discord.gg,status.discord.com,web.whatsapp.com,graph.facebook.com,googleapis.com,google.com,googleusercontent.com,gstatic.com",
50
+ );
51
+
52
+ const isAllowedHost = (hostname) => {
53
+ const normalized = String(hostname || "")
54
+ .trim()
55
+ .toLowerCase();
56
+ if (!normalized) return false;
57
+ if (allowProxyAll) return true;
58
+ return allowedTargets.some(
59
+ (domain) => normalized === domain || normalized.endsWith(`.${domain}`),
60
+ );
61
+ };
62
+
63
+ let targetBase = "";
64
+ if (targetHost) {
65
+ if (!isAllowedHost(targetHost)) {
66
+ return new Response(`Forbidden: Host ${targetHost} is not allowed.`, { status: 403 });
67
+ }
68
+ targetBase = `https://${targetHost}`;
69
+ } else if (url.pathname.startsWith("/bot")) {
70
+ targetBase = "https://api.telegram.org";
71
+ } else {
72
+ return new Response("Invalid request: No target host provided.", { status: 400 });
73
+ }
74
+
75
+ const cleanSearch = new URLSearchParams(url.search);
76
+ cleanSearch.delete("proxy_target");
77
+ cleanSearch.delete("proxy_key");
78
+ const searchStr = cleanSearch.toString();
79
+ const targetUrl = targetBase + url.pathname + (searchStr ? `?${searchStr}` : "");
80
+
81
+ const headers = new Headers(request.headers);
82
+ headers.delete("cf-connecting-ip");
83
+ headers.delete("cf-ray");
84
+ headers.delete("cf-visitor");
85
+ headers.delete("host");
86
+ headers.delete("x-real-ip");
87
+ headers.delete("x-target-host");
88
+ headers.delete("x-proxy-key");
89
+
90
+ const proxiedRequest = new Request(targetUrl, {
91
+ method: request.method,
92
+ headers,
93
+ body: request.body,
94
+ redirect: "follow",
95
+ });
96
+
97
+ try {
98
+ return await fetch(proxiedRequest);
99
+ } catch (error) {
100
+ return new Response(`Proxy Error: ${error.message}`, { status: 502 });
101
+ }
102
+ },
103
+ };
docker-compose.yml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
+ services:
4
+ # PostgreSQL database
5
+ postgres:
6
+ image: postgres:16-alpine
7
+ container_name: huggingclip-postgres
8
+ environment:
9
+ POSTGRES_DB: paperclip
10
+ POSTGRES_USER: postgres
11
+ POSTGRES_PASSWORD: paperclip
12
+ POSTGRES_INITDB_ARGS: "--encoding=UTF8"
13
+ ports:
14
+ - "5432:5432"
15
+ volumes:
16
+ - postgres_data:/var/lib/postgresql/data
17
+ healthcheck:
18
+ test: ["CMD-SHELL", "pg_isready -U postgres"]
19
+ interval: 10s
20
+ timeout: 5s
21
+ retries: 5
22
+ networks:
23
+ - huggingclip-network
24
+
25
+ # HuggingClip application
26
+ paperclip:
27
+ build:
28
+ context: .
29
+ dockerfile: Dockerfile
30
+ container_name: huggingclip-app
31
+ depends_on:
32
+ postgres:
33
+ condition: service_healthy
34
+ environment:
35
+ # Database
36
+ DATABASE_URL: postgres://postgres:paperclip@postgres:5432/paperclip
37
+
38
+ # Paperclip config
39
+ PORT: 3100
40
+ SERVE_UI: "true"
41
+ NODE_ENV: development
42
+ HOST: 0.0.0.0
43
+ PAPERCLIP_HOME: /paperclip
44
+ PAPERCLIP_DEPLOYMENT_MODE: local
45
+
46
+ # Agent providers (add your keys here)
47
+ # CLAUDE_API_KEY: ${CLAUDE_API_KEY:-}
48
+ # LLM_API_KEY: ${LLM_API_KEY:-}
49
+
50
+ # HF Backup (optional for local testing)
51
+ HF_TOKEN: ${HF_TOKEN:-}
52
+ HF_USERNAME: ${HF_USERNAME:-}
53
+ SYNC_INTERVAL: "180"
54
+ BACKUP_DATASET_NAME: paperclip-backup-dev
55
+
56
+ # Cloudflare (optional)
57
+ # CLOUDFLARE_WORKERS_TOKEN: ${CLOUDFLARE_WORKERS_TOKEN:-}
58
+ # CLOUDFLARE_ACCOUNT_ID: ${CLOUDFLARE_ACCOUNT_ID:-}
59
+
60
+ # Telemetry
61
+ PAPERCLIP_TELEMETRY_DISABLED: "1"
62
+ DO_NOT_TRACK: "1"
63
+
64
+ ports:
65
+ - "7861:7861" # Public health server + dashboard
66
+ - "3100:3100" # Direct Paperclip access (development only)
67
+ volumes:
68
+ - paperclip_data:/paperclip
69
+ - ./paperclip-sync.py:/app/paperclip-sync.py
70
+ - ./health-server.js:/app/health-server.js
71
+ - ./start.sh:/app/start.sh
72
+ healthcheck:
73
+ test: ["CMD", "curl", "-f", "http://localhost:7861/health"]
74
+ interval: 30s
75
+ timeout: 10s
76
+ retries: 3
77
+ start_period: 90s
78
+ networks:
79
+ - huggingclip-network
80
+ cap_add:
81
+ - NET_ADMIN # For potential network utilities
82
+
83
+ volumes:
84
+ postgres_data:
85
+ driver: local
86
+ paperclip_data:
87
+ driver: local
88
+
89
+ networks:
90
+ huggingclip-network:
91
+ driver: bridge
health-server.js ADDED
@@ -0,0 +1,683 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ const express = require('express');
2
+ const cors = require('cors');
3
+ const morgan = require('morgan');
4
+ const fs = require('fs');
5
+ const path = require('path');
6
+ const { promisify } = require('util');
7
+ const http = require('http');
8
+
9
+ const app = express();
10
+ const PORT = process.env.PORT || 7861;
11
+ const PAPERCLIP_HOST = process.env.HOST || '127.0.0.1';
12
+ const PAPERCLIP_PORT = 3100;
13
+
14
+ // Middleware
15
+ app.use(cors());
16
+ app.use(morgan('combined'));
17
+ app.use(express.json());
18
+ app.use(express.urlencoded({ extended: true }));
19
+
20
+ // ============================================================================
21
+ // Health Check Endpoint
22
+ // ============================================================================
23
+ app.get('/health', async (req, res) => {
24
+ try {
25
+ const syncStatus = readSyncStatus();
26
+ const now = Math.floor(Date.now() / 1000);
27
+ const uptime = process.uptime();
28
+
29
+ // Try to check if Paperclip is responding
30
+ const paperclipStatus = await checkPaperclipHealth();
31
+
32
+ res.status(200).json({
33
+ status: 'healthy',
34
+ timestamp: new Date().toISOString(),
35
+ uptime: Math.floor(uptime),
36
+ services: {
37
+ healthServer: {
38
+ status: 'running',
39
+ port: PORT,
40
+ uptime: Math.floor(uptime)
41
+ },
42
+ paperclip: {
43
+ status: paperclipStatus.status,
44
+ port: PAPERCLIP_PORT,
45
+ url: `http://${PAPERCLIP_HOST}:${PAPERCLIP_PORT}`
46
+ },
47
+ database: {
48
+ status: syncStatus.db_status || 'unknown',
49
+ lastSync: syncStatus.last_sync_time || null,
50
+ lastSyncError: syncStatus.last_error || null
51
+ }
52
+ },
53
+ backup: {
54
+ enabled: process.env.SYNC_DISABLED !== 'true',
55
+ interval: process.env.SYNC_INTERVAL || 180,
56
+ lastSync: syncStatus.last_sync_time,
57
+ nextSync: syncStatus.last_sync_time ? new Date((syncStatus.last_sync_time + (parseInt(process.env.SYNC_INTERVAL || 180) * 1000))).toISOString() : null
58
+ }
59
+ });
60
+ } catch (error) {
61
+ res.status(503).json({
62
+ status: 'unhealthy',
63
+ error: error.message,
64
+ timestamp: new Date().toISOString()
65
+ });
66
+ }
67
+ });
68
+
69
+ // ============================================================================
70
+ // Dashboard Route
71
+ // ============================================================================
72
+ app.get('/', (req, res) => {
73
+ res.send(getDashboardHTML());
74
+ });
75
+
76
+ app.get('/dashboard/', (req, res) => {
77
+ res.send(getDashboardHTML());
78
+ });
79
+
80
+ app.get('/dashboard/status', (req, res) => {
81
+ const syncStatus = readSyncStatus();
82
+ const uptime = process.uptime();
83
+
84
+ res.json({
85
+ uptime: Math.floor(uptime),
86
+ startTime: new Date(Date.now() - uptime * 1000).toISOString(),
87
+ syncStatus: syncStatus,
88
+ environment: {
89
+ syncDisabled: process.env.SYNC_DISABLED === 'true',
90
+ syncInterval: process.env.SYNC_INTERVAL || 180,
91
+ paperclipHome: process.env.PAPERCLIP_HOME || '/paperclip'
92
+ }
93
+ });
94
+ });
95
+
96
+ // ============================================================================
97
+ // UptimeRobot Setup Route
98
+ // ============================================================================
99
+ app.post('/dashboard/uptimerobot/setup', (req, res) => {
100
+ const { webhookUrl } = req.body;
101
+
102
+ if (!webhookUrl) {
103
+ return res.status(400).json({ error: 'webhookUrl required' });
104
+ }
105
+
106
+ // Store webhook URL in environment or file
107
+ process.env.WEBHOOK_URL = webhookUrl;
108
+
109
+ res.json({
110
+ success: true,
111
+ message: 'UptimeRobot webhook configured',
112
+ details: 'Health checks will now notify UptimeRobot to prevent sleep'
113
+ });
114
+ });
115
+
116
+ // ============================================================================
117
+ // Reverse Proxy Routes
118
+ // ============================================================================
119
+
120
+ // Proxy all /app/* requests to Paperclip
121
+ app.all('/app/*', async (req, res) => {
122
+ const targetPath = req.path.replace('/app', '') || '/';
123
+ const targetUrl = `http://${PAPERCLIP_HOST}:${PAPERCLIP_PORT}${targetPath}`;
124
+
125
+ try {
126
+ const response = await proxyRequest(
127
+ req.method,
128
+ targetUrl,
129
+ req.headers,
130
+ req.body
131
+ );
132
+
133
+ // Copy response headers
134
+ Object.keys(response.headers).forEach(key => {
135
+ res.setHeader(key, response.headers[key]);
136
+ });
137
+
138
+ res.status(response.statusCode).send(response.body);
139
+ } catch (error) {
140
+ console.error(`Proxy error: ${error.message}`);
141
+ res.status(503).json({
142
+ error: 'Paperclip service unavailable',
143
+ details: error.message
144
+ });
145
+ }
146
+ });
147
+
148
+ // Proxy all /api/* requests to Paperclip
149
+ app.all('/api/*', async (req, res) => {
150
+ const targetPath = req.path;
151
+ const targetUrl = `http://${PAPERCLIP_HOST}:${PAPERCLIP_PORT}${targetPath}`;
152
+
153
+ try {
154
+ const response = await proxyRequest(
155
+ req.method,
156
+ targetUrl,
157
+ req.headers,
158
+ req.body
159
+ );
160
+
161
+ Object.keys(response.headers).forEach(key => {
162
+ res.setHeader(key, response.headers[key]);
163
+ });
164
+
165
+ res.status(response.statusCode).send(response.body);
166
+ } catch (error) {
167
+ console.error(`API proxy error: ${error.message}`);
168
+ res.status(503).json({
169
+ error: 'Paperclip API unavailable',
170
+ details: error.message
171
+ });
172
+ }
173
+ });
174
+
175
+ // ============================================================================
176
+ // Default Route - Redirect to Dashboard
177
+ // ============================================================================
178
+ app.get('/', (req, res) => {
179
+ res.send(getDashboardHTML());
180
+ });
181
+
182
+ // ============================================================================
183
+ // Helper Functions
184
+ // ============================================================================
185
+
186
+ function readSyncStatus() {
187
+ try {
188
+ if (fs.existsSync('/tmp/sync-status.json')) {
189
+ const data = fs.readFileSync('/tmp/sync-status.json', 'utf8');
190
+ return JSON.parse(data);
191
+ }
192
+ } catch (error) {
193
+ console.error('Error reading sync status:', error.message);
194
+ }
195
+
196
+ return {
197
+ db_status: 'unknown',
198
+ last_sync_time: null,
199
+ last_error: null,
200
+ sync_count: 0
201
+ };
202
+ }
203
+
204
+ function checkPaperclipHealth() {
205
+ return new Promise((resolve) => {
206
+ const healthUrl = `http://${PAPERCLIP_HOST}:${PAPERCLIP_PORT}/health`;
207
+
208
+ const timeout = setTimeout(() => {
209
+ resolve({ status: 'unreachable', reason: 'timeout' });
210
+ }, 5000);
211
+
212
+ http.get(healthUrl, (res) => {
213
+ clearTimeout(timeout);
214
+ resolve({ status: 'running', statusCode: res.statusCode });
215
+ }).on('error', (err) => {
216
+ clearTimeout(timeout);
217
+ resolve({ status: 'unreachable', reason: err.message });
218
+ });
219
+ });
220
+ }
221
+
222
+ function proxyRequest(method, url, headers, body) {
223
+ return new Promise((resolve, reject) => {
224
+ const options = {
225
+ method,
226
+ headers: {
227
+ ...headers,
228
+ 'host': `${PAPERCLIP_HOST}:${PAPERCLIP_PORT}`
229
+ },
230
+ timeout: 30000
231
+ };
232
+
233
+ const req = http.request(url, options, (res) => {
234
+ let data = '';
235
+
236
+ res.on('data', (chunk) => {
237
+ data += chunk;
238
+ });
239
+
240
+ res.on('end', () => {
241
+ resolve({
242
+ statusCode: res.statusCode,
243
+ headers: res.headers,
244
+ body: data
245
+ });
246
+ });
247
+ });
248
+
249
+ req.on('error', (err) => {
250
+ reject(err);
251
+ });
252
+
253
+ if (body && Object.keys(body).length > 0) {
254
+ req.write(JSON.stringify(body));
255
+ }
256
+
257
+ req.end();
258
+ });
259
+ }
260
+
261
+ function getDashboardHTML() {
262
+ return `<!DOCTYPE html>
263
+ <html lang="en">
264
+ <head>
265
+ <meta charset="UTF-8">
266
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
267
+ <title>HuggingClip - Paperclip on HF Spaces</title>
268
+ <style>
269
+ * {
270
+ margin: 0;
271
+ padding: 0;
272
+ box-sizing: border-box;
273
+ }
274
+
275
+ body {
276
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', 'Ubuntu', 'Cantarell', sans-serif;
277
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
278
+ min-height: 100vh;
279
+ padding: 20px;
280
+ }
281
+
282
+ .container {
283
+ max-width: 1200px;
284
+ margin: 0 auto;
285
+ }
286
+
287
+ .header {
288
+ text-align: center;
289
+ color: white;
290
+ margin-bottom: 40px;
291
+ animation: slideDown 0.6s ease-out;
292
+ }
293
+
294
+ @keyframes slideDown {
295
+ from {
296
+ opacity: 0;
297
+ transform: translateY(-20px);
298
+ }
299
+ to {
300
+ opacity: 1;
301
+ transform: translateY(0);
302
+ }
303
+ }
304
+
305
+ .header h1 {
306
+ font-size: 2.5em;
307
+ margin-bottom: 10px;
308
+ font-weight: 700;
309
+ }
310
+
311
+ .header p {
312
+ font-size: 1.1em;
313
+ opacity: 0.9;
314
+ }
315
+
316
+ .grid {
317
+ display: grid;
318
+ grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
319
+ gap: 20px;
320
+ margin-bottom: 30px;
321
+ }
322
+
323
+ .card {
324
+ background: white;
325
+ border-radius: 12px;
326
+ padding: 24px;
327
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
328
+ animation: fadeIn 0.6s ease-out;
329
+ }
330
+
331
+ @keyframes fadeIn {
332
+ from {
333
+ opacity: 0;
334
+ transform: translateY(20px);
335
+ }
336
+ to {
337
+ opacity: 1;
338
+ transform: translateY(0);
339
+ }
340
+ }
341
+
342
+ .card:nth-child(1) { animation-delay: 0.1s; }
343
+ .card:nth-child(2) { animation-delay: 0.2s; }
344
+ .card:nth-child(3) { animation-delay: 0.3s; }
345
+ .card:nth-child(4) { animation-delay: 0.4s; }
346
+
347
+ .card h2 {
348
+ color: #333;
349
+ font-size: 1.3em;
350
+ margin-bottom: 16px;
351
+ display: flex;
352
+ align-items: center;
353
+ gap: 10px;
354
+ }
355
+
356
+ .status-indicator {
357
+ width: 12px;
358
+ height: 12px;
359
+ border-radius: 50%;
360
+ display: inline-block;
361
+ }
362
+
363
+ .status-indicator.running {
364
+ background-color: #4ade80;
365
+ box-shadow: 0 0 10px rgba(74, 222, 128, 0.5);
366
+ }
367
+
368
+ .status-indicator.stopped {
369
+ background-color: #ef4444;
370
+ }
371
+
372
+ .status-indicator.unknown {
373
+ background-color: #eab308;
374
+ }
375
+
376
+ .stat {
377
+ margin: 12px 0;
378
+ display: flex;
379
+ justify-content: space-between;
380
+ align-items: center;
381
+ padding: 8px 0;
382
+ border-bottom: 1px solid #f0f0f0;
383
+ }
384
+
385
+ .stat:last-child {
386
+ border-bottom: none;
387
+ }
388
+
389
+ .stat-label {
390
+ color: #666;
391
+ font-size: 0.95em;
392
+ }
393
+
394
+ .stat-value {
395
+ color: #333;
396
+ font-weight: 600;
397
+ font-size: 0.95em;
398
+ }
399
+
400
+ .button-group {
401
+ display: flex;
402
+ gap: 10px;
403
+ margin-top: 16px;
404
+ }
405
+
406
+ .button {
407
+ flex: 1;
408
+ padding: 10px 16px;
409
+ border: none;
410
+ border-radius: 6px;
411
+ font-size: 0.9em;
412
+ cursor: pointer;
413
+ transition: all 0.3s ease;
414
+ text-decoration: none;
415
+ display: inline-flex;
416
+ align-items: center;
417
+ justify-content: center;
418
+ gap: 6px;
419
+ }
420
+
421
+ .button-primary {
422
+ background: #667eea;
423
+ color: white;
424
+ }
425
+
426
+ .button-primary:hover {
427
+ background: #5568d3;
428
+ transform: translateY(-2px);
429
+ box-shadow: 0 5px 15px rgba(102, 126, 234, 0.4);
430
+ }
431
+
432
+ .button-secondary {
433
+ background: #f3f4f6;
434
+ color: #333;
435
+ border: 1px solid #e5e7eb;
436
+ }
437
+
438
+ .button-secondary:hover {
439
+ background: #e5e7eb;
440
+ }
441
+
442
+ .footer {
443
+ text-align: center;
444
+ color: white;
445
+ margin-top: 40px;
446
+ opacity: 0.8;
447
+ font-size: 0.9em;
448
+ }
449
+
450
+ .footer a {
451
+ color: white;
452
+ text-decoration: underline;
453
+ }
454
+
455
+ .error {
456
+ color: #dc2626;
457
+ font-size: 0.85em;
458
+ }
459
+
460
+ .success {
461
+ color: #16a34a;
462
+ font-size: 0.85em;
463
+ }
464
+
465
+ .pending {
466
+ color: #ea580c;
467
+ font-size: 0.85em;
468
+ }
469
+
470
+ .code {
471
+ background: #f3f4f6;
472
+ padding: 2px 6px;
473
+ border-radius: 3px;
474
+ font-family: 'Monaco', 'Courier New', monospace;
475
+ font-size: 0.85em;
476
+ }
477
+
478
+ .loading {
479
+ display: inline-block;
480
+ width: 8px;
481
+ height: 8px;
482
+ border-radius: 50%;
483
+ background: #667eea;
484
+ animation: pulse 1.5s infinite;
485
+ }
486
+
487
+ @keyframes pulse {
488
+ 0%, 100% { opacity: 1; }
489
+ 50% { opacity: 0.3; }
490
+ }
491
+ </style>
492
+ </head>
493
+ <body>
494
+ <div class="container">
495
+ <div class="header">
496
+ <h1>πŸ”— HuggingClip</h1>
497
+ <p>Paperclip AI Agent Orchestration on Hugging Face Spaces</p>
498
+ </div>
499
+
500
+ <div class="grid">
501
+ <!-- Paperclip Status Card -->
502
+ <div class="card">
503
+ <h2>
504
+ <span class="status-indicator running"></span>
505
+ Paperclip Service
506
+ </h2>
507
+ <div class="stat">
508
+ <span class="stat-label">Status</span>
509
+ <span class="stat-value" id="paperclip-status">
510
+ <span class="loading"></span> Checking...
511
+ </span>
512
+ </div>
513
+ <div class="stat">
514
+ <span class="stat-label">Port</span>
515
+ <span class="stat-value">3100</span>
516
+ </div>
517
+ <div class="stat">
518
+ <span class="stat-label">UI URL</span>
519
+ <span class="stat-value"><span class="code">/app/</span></span>
520
+ </div>
521
+ <div class="button-group">
522
+ <a href="/app/" class="button button-primary" target="_blank">Open Paperclip UI</a>
523
+ </div>
524
+ </div>
525
+
526
+ <!-- Database Status Card -->
527
+ <div class="card">
528
+ <h2>
529
+ <span class="status-indicator running"></span>
530
+ Database
531
+ </h2>
532
+ <div class="stat">
533
+ <span class="stat-label">Status</span>
534
+ <span class="stat-value" id="db-status">PostgreSQL</span>
535
+ </div>
536
+ <div class="stat">
537
+ <span class="stat-label">Location</span>
538
+ <span class="stat-value"><span class="code">/paperclip</span></span>
539
+ </div>
540
+ <div class="stat">
541
+ <span class="stat-label">Last Backup</span>
542
+ <span class="stat-value" id="last-backup">Never</span>
543
+ </div>
544
+ <div class="stat">
545
+ <span class="stat-label">Backup Status</span>
546
+ <span class="stat-value" id="backup-status">
547
+ <span class="loading"></span> Checking...
548
+ </span>
549
+ </div>
550
+ </div>
551
+
552
+ <!-- System Health Card -->
553
+ <div class="card">
554
+ <h2>
555
+ <span class="status-indicator running"></span>
556
+ System Health
557
+ </h2>
558
+ <div class="stat">
559
+ <span class="stat-label">Uptime</span>
560
+ <span class="stat-value" id="uptime">Calculating...</span>
561
+ </div>
562
+ <div class="stat">
563
+ <span class="stat-label">Start Time</span>
564
+ <span class="stat-value" id="start-time">Calculating...</span>
565
+ </div>
566
+ <div class="stat">
567
+ <span class="stat-label">Health Server</span>
568
+ <span class="stat-value success">Running</span>
569
+ </div>
570
+ <div class="stat">
571
+ <span class="stat-label">API Port</span>
572
+ <span class="stat-value"><span class="code">7861</span></span>
573
+ </div>
574
+ </div>
575
+
576
+ <!-- Quick Links Card -->
577
+ <div class="card">
578
+ <h2>πŸ“š Resources</h2>
579
+ <div class="button-group" style="flex-direction: column;">
580
+ <a href="/app/" class="button button-primary" target="_blank">Paperclip Dashboard</a>
581
+ <a href="/api/" class="button button-secondary" target="_blank">API Reference</a>
582
+ </div>
583
+ <div class="stat" style="margin-top: 16px;">
584
+ <span class="stat-label">Documentation</span>
585
+ <span class="stat-value"><a href="https://docs.paperclip.ing" target="_blank" style="color: #667eea; text-decoration: underline;">paperclip.ing</a></span>
586
+ </div>
587
+ <div class="stat">
588
+ <span class="stat-label">GitHub</span>
589
+ <span class="stat-value"><a href="https://github.com/paperclipai/paperclip" target="_blank" style="color: #667eea; text-decoration: underline;">paperclipai/paperclip</a></span>
590
+ </div>
591
+ </div>
592
+ </div>
593
+
594
+ <div class="footer">
595
+ <p>HuggingClip v1.0 β€’ Running on Hugging Face Spaces</p>
596
+ <p style="margin-top: 10px; opacity: 0.6;">Last updated: <span id="footer-time">loading...</span></p>
597
+ </div>
598
+ </div>
599
+
600
+ <script>
601
+ // Update status every 5 seconds
602
+ async function updateStatus() {
603
+ try {
604
+ const response = await fetch('/health');
605
+ const data = await response.json();
606
+
607
+ // Update Paperclip status
608
+ const paperclipEl = document.getElementById('paperclip-status');
609
+ if (data.services.paperclip.status === 'running') {
610
+ paperclipEl.innerHTML = '<span class="success">Running βœ“</span>';
611
+ } else {
612
+ paperclipEl.innerHTML = '<span class="error">Unreachable</span>';
613
+ }
614
+
615
+ // Update DB status
616
+ const dbEl = document.getElementById('db-status');
617
+ if (data.services.database.status === 'connected') {
618
+ dbEl.innerHTML = '<span class="success">Connected βœ“</span>';
619
+ } else {
620
+ dbEl.innerHTML = '<span class="pending">PostgreSQL</span>';
621
+ }
622
+
623
+ // Update last backup
624
+ const lastBackupEl = document.getElementById('last-backup');
625
+ if (data.services.database.lastSync) {
626
+ const lastSync = new Date(data.services.database.lastSync).toLocaleString();
627
+ lastBackupEl.textContent = lastSync;
628
+ } else {
629
+ lastBackupEl.textContent = 'Never';
630
+ }
631
+
632
+ // Update backup status
633
+ const backupStatusEl = document.getElementById('backup-status');
634
+ if (data.backup.enabled) {
635
+ const errorMsg = data.services.database.lastSyncError;
636
+ if (errorMsg) {
637
+ backupStatusEl.innerHTML = '<span class="error">Error</span>';
638
+ } else {
639
+ backupStatusEl.innerHTML = '<span class="success">Enabled βœ“</span>';
640
+ }
641
+ } else {
642
+ backupStatusEl.innerHTML = '<span class="pending">Disabled</span>';
643
+ }
644
+
645
+ // Update uptime
646
+ const uptimeEl = document.getElementById('uptime');
647
+ const uptime = data.uptime;
648
+ const hours = Math.floor(uptime / 3600);
649
+ const minutes = Math.floor((uptime % 3600) / 60);
650
+ const seconds = Math.floor(uptime % 60);
651
+ uptimeEl.textContent = \`\${hours}h \${minutes}m \${seconds}s\`;
652
+
653
+ // Update start time
654
+ const startTimeEl = document.getElementById('start-time');
655
+ const startTime = new Date(data.startTime).toLocaleString();
656
+ startTimeEl.textContent = startTime;
657
+
658
+ // Update footer time
659
+ document.getElementById('footer-time').textContent = new Date().toLocaleString();
660
+ } catch (error) {
661
+ console.error('Status update failed:', error);
662
+ }
663
+ }
664
+
665
+ // Initial update
666
+ updateStatus();
667
+
668
+ // Update every 5 seconds
669
+ setInterval(updateStatus, 5000);
670
+ </script>
671
+ </body>
672
+ </html>`;
673
+ }
674
+
675
+ // ============================================================================
676
+ // Start Server
677
+ // ============================================================================
678
+ app.listen(PORT, '0.0.0.0', () => {
679
+ console.log(`βœ“ Health server listening on port ${PORT}`);
680
+ console.log(`βœ“ Dashboard: http://localhost:${PORT}/`);
681
+ console.log(`βœ“ API proxy: http://localhost:${PORT}/api/*`);
682
+ console.log(`βœ“ App proxy: http://localhost:${PORT}/app/`);
683
+ });
paperclip-sync.py ADDED
@@ -0,0 +1,494 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ HuggingClip Database Sync - PostgreSQL Backup/Restore to Hugging Face Dataset
4
+ Syncs Paperclip's PostgreSQL database to HF Dataset for persistence across restarts.
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import json
10
+ import tarfile
11
+ import tempfile
12
+ import subprocess
13
+ import logging
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+
17
+ from huggingface_hub import HfApi, HfFolder
18
+ from huggingface_hub.utils import RepositoryNotFoundError
19
+
20
+ # ============================================================================
21
+ # Configuration
22
+ # ============================================================================
23
+
24
+ # Logging setup
25
+ logging.basicConfig(
26
+ level=logging.INFO,
27
+ format='%(asctime)s - %(levelname)s - %(message)s'
28
+ )
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # Environment variables
32
+ HF_TOKEN = os.environ.get('HF_TOKEN')
33
+ HF_USERNAME = os.environ.get('HF_USERNAME')
34
+ DATABASE_URL = os.environ.get('DATABASE_URL', 'postgres://postgres:paperclip@localhost:5432/paperclip')
35
+ BACKUP_DATASET_NAME = os.environ.get('BACKUP_DATASET_NAME', 'paperclip-backup')
36
+ SYNC_MAX_FILE_BYTES = int(os.environ.get('SYNC_MAX_FILE_BYTES', '52428800')) # 50MB
37
+ PAPERCLIP_HOME = os.environ.get('PAPERCLIP_HOME', '/paperclip')
38
+
39
+ # Status file for dashboard
40
+ STATUS_FILE = Path('/tmp/sync-status.json')
41
+
42
+ # ============================================================================
43
+ # Helper Functions
44
+ # ============================================================================
45
+
46
+ def parse_db_url(db_url: str) -> dict:
47
+ """Parse PostgreSQL connection URL"""
48
+ # Format: postgres://user:password@host:port/database
49
+ try:
50
+ # Remove protocol
51
+ connection_str = db_url.replace('postgres://', '').replace('postgresql://', '')
52
+
53
+ # Parse credentials
54
+ if '@' in connection_str:
55
+ creds, host_db = connection_str.split('@')
56
+ if ':' in creds:
57
+ user, password = creds.split(':', 1)
58
+ else:
59
+ user = creds
60
+ password = ''
61
+ else:
62
+ user = 'postgres'
63
+ password = ''
64
+ host_db = connection_str
65
+
66
+ # Parse host and database
67
+ if ':' in host_db and '/' in host_db:
68
+ host_port, database = host_db.rsplit('/', 1)
69
+ if ':' in host_port:
70
+ host, port = host_port.rsplit(':', 1)
71
+ else:
72
+ host = host_port
73
+ port = '5432'
74
+ else:
75
+ host = host_db.split('/')[0]
76
+ port = '5432'
77
+ database = host_db.split('/')[-1] if '/' in host_db else 'paperclip'
78
+
79
+ return {
80
+ 'user': user,
81
+ 'password': password,
82
+ 'host': host,
83
+ 'port': port,
84
+ 'database': database
85
+ }
86
+ except Exception as e:
87
+ logger.error(f'Failed to parse DATABASE_URL: {e}')
88
+ return None
89
+
90
+ def write_status(status: dict):
91
+ """Write sync status to file for dashboard"""
92
+ try:
93
+ STATUS_FILE.write_text(json.dumps(status, indent=2))
94
+ except Exception as e:
95
+ logger.error(f'Failed to write status file: {e}')
96
+
97
+ def read_status() -> dict:
98
+ """Read current sync status"""
99
+ if STATUS_FILE.exists():
100
+ try:
101
+ return json.loads(STATUS_FILE.read_text())
102
+ except Exception as e:
103
+ logger.error(f'Failed to read status file: {e}')
104
+
105
+ return {
106
+ 'db_status': 'unknown',
107
+ 'last_sync_time': None,
108
+ 'last_error': None,
109
+ 'sync_count': 0
110
+ }
111
+
112
+ def backup_database() -> tuple[str, bool]:
113
+ """
114
+ Backup PostgreSQL database to SQL dump.
115
+ Returns (filepath, success)
116
+ """
117
+ logger.info('Starting database backup...')
118
+
119
+ db_config = parse_db_url(DATABASE_URL)
120
+ if not db_config:
121
+ return None, False
122
+
123
+ temp_dir = tempfile.mkdtemp()
124
+ dump_file = Path(temp_dir) / 'paperclip.sql'
125
+
126
+ try:
127
+ # Build pg_dump command
128
+ env = os.environ.copy()
129
+ if db_config['password']:
130
+ env['PGPASSWORD'] = db_config['password']
131
+
132
+ cmd = [
133
+ 'pg_dump',
134
+ f'--host={db_config["host"]}',
135
+ f'--port={db_config["port"]}',
136
+ f'--username={db_config["user"]}',
137
+ '--format=plain',
138
+ '--verbose',
139
+ db_config['database']
140
+ ]
141
+
142
+ # Execute pg_dump
143
+ with open(dump_file, 'w') as f:
144
+ result = subprocess.run(cmd, stdout=f, stderr=subprocess.PIPE, env=env, timeout=300)
145
+
146
+ if result.returncode != 0:
147
+ error_msg = result.stderr.decode('utf-8', errors='ignore')
148
+ logger.error(f'pg_dump failed: {error_msg}')
149
+ return None, False
150
+
151
+ dump_size = dump_file.stat().st_size
152
+ logger.info(f'Database dumped successfully ({dump_size / 1024 / 1024:.2f} MB)')
153
+
154
+ return str(dump_file), True
155
+
156
+ except subprocess.TimeoutExpired:
157
+ logger.error('Database backup timed out (>300s)')
158
+ return None, False
159
+ except Exception as e:
160
+ logger.error(f'Database backup error: {e}')
161
+ return None, False
162
+
163
+ def create_backup_tarball(dump_file: str) -> tuple[str, bool]:
164
+ """
165
+ Create tarball with database dump and Paperclip data files.
166
+ Returns (tarball_path, success)
167
+ """
168
+ logger.info('Creating backup tarball...')
169
+
170
+ temp_dir = tempfile.mkdtemp()
171
+ tarball_file = Path(temp_dir) / 'paperclip-backup.tar.gz'
172
+
173
+ try:
174
+ with tarfile.open(tarball_file, 'w:gz') as tar:
175
+ # Add SQL dump
176
+ tar.add(dump_file, arcname='paperclip.sql')
177
+
178
+ # Add Paperclip home directory if it exists
179
+ if Path(PAPERCLIP_HOME).exists():
180
+ tar.add(PAPERCLIP_HOME, arcname='paperclip-data')
181
+ else:
182
+ logger.warning(f'PAPERCLIP_HOME not found: {PAPERCLIP_HOME}')
183
+
184
+ tarball_size = tarball_file.stat().st_size
185
+ logger.info(f'Backup tarball created ({tarball_size / 1024 / 1024:.2f} MB)')
186
+
187
+ # Check size limit
188
+ if tarball_size > SYNC_MAX_FILE_BYTES:
189
+ logger.error(f'Backup too large ({tarball_size / SYNC_MAX_FILE_BYTES * 100:.0f}% of limit)')
190
+ return None, False
191
+
192
+ return str(tarball_file), True
193
+
194
+ except Exception as e:
195
+ logger.error(f'Failed to create tarball: {e}')
196
+ return None, False
197
+
198
+ def sync_to_hf(backup_file: str) -> bool:
199
+ """
200
+ Upload backup tarball to Hugging Face Dataset.
201
+ """
202
+ if not HF_TOKEN:
203
+ logger.warning('HF_TOKEN not set - skipping backup upload')
204
+ return False
205
+
206
+ logger.info('Uploading backup to Hugging Face...')
207
+
208
+ try:
209
+ api = HfApi(token=HF_TOKEN)
210
+
211
+ # Get username if not provided
212
+ username = HF_USERNAME
213
+ if not username:
214
+ try:
215
+ user_info = api.whoami()
216
+ username = user_info['name']
217
+ except Exception:
218
+ logger.error('Failed to get HF username')
219
+ return False
220
+
221
+ dataset_id = f'{username}/{BACKUP_DATASET_NAME}'
222
+
223
+ # Create dataset if it doesn't exist
224
+ try:
225
+ logger.info(f'Using dataset: {dataset_id}')
226
+ except RepositoryNotFoundError:
227
+ logger.info(f'Creating dataset: {dataset_id}')
228
+ api.create_repo(repo_id=dataset_id, repo_type='dataset', private=True, exist_ok=True)
229
+
230
+ # Upload file
231
+ api.upload_file(
232
+ path_or_fileobj=backup_file,
233
+ path_in_repo='snapshots/latest.tar.gz',
234
+ repo_id=dataset_id,
235
+ repo_type='dataset'
236
+ )
237
+
238
+ logger.info(f'Backup uploaded to {dataset_id}')
239
+ return True
240
+
241
+ except Exception as e:
242
+ logger.error(f'Failed to upload to HF: {e}')
243
+ return False
244
+
245
+ def restore_database(restore_file: str) -> bool:
246
+ """
247
+ Restore PostgreSQL database from SQL dump.
248
+ """
249
+ logger.info('Restoring database from backup...')
250
+
251
+ db_config = parse_db_url(DATABASE_URL)
252
+ if not db_config:
253
+ return False
254
+
255
+ try:
256
+ # Ensure database exists
257
+ admin_cmd = [
258
+ 'psql',
259
+ f'--host={db_config["host"]}',
260
+ f'--port={db_config["port"]}',
261
+ f'--username={db_config["user"]}',
262
+ '--no-password',
263
+ '-c',
264
+ f'CREATE DATABASE IF NOT EXISTS {db_config["database"]};'
265
+ ]
266
+
267
+ env = os.environ.copy()
268
+ if db_config['password']:
269
+ env['PGPASSWORD'] = db_config['password']
270
+
271
+ subprocess.run(admin_cmd, env=env, capture_output=True)
272
+
273
+ # Restore from dump
274
+ restore_cmd = [
275
+ 'psql',
276
+ f'--host={db_config["host"]}',
277
+ f'--port={db_config["port"]}',
278
+ f'--username={db_config["user"]}',
279
+ '--no-password',
280
+ db_config['database']
281
+ ]
282
+
283
+ with open(restore_file, 'r') as f:
284
+ result = subprocess.run(
285
+ restore_cmd,
286
+ stdin=f,
287
+ stderr=subprocess.PIPE,
288
+ env=env,
289
+ timeout=300
290
+ )
291
+
292
+ if result.returncode != 0:
293
+ error_msg = result.stderr.decode('utf-8', errors='ignore')
294
+ logger.error(f'Restore failed: {error_msg}')
295
+ return False
296
+
297
+ logger.info('Database restored successfully')
298
+ return True
299
+
300
+ except subprocess.TimeoutExpired:
301
+ logger.error('Database restore timed out (>300s)')
302
+ return False
303
+ except Exception as e:
304
+ logger.error(f'Database restore error: {e}')
305
+ return False
306
+
307
+ def sync_from_hf() -> bool:
308
+ """
309
+ Download backup from Hugging Face and restore database.
310
+ """
311
+ if not HF_TOKEN:
312
+ logger.warning('HF_TOKEN not set - skipping restore')
313
+ return False
314
+
315
+ logger.info('Downloading backup from Hugging Face...')
316
+
317
+ try:
318
+ api = HfApi(token=HF_TOKEN)
319
+
320
+ # Get username
321
+ username = HF_USERNAME
322
+ if not username:
323
+ try:
324
+ user_info = api.whoami()
325
+ username = user_info['name']
326
+ except Exception:
327
+ logger.warning('Failed to get HF username')
328
+ return False
329
+
330
+ dataset_id = f'{username}/{BACKUP_DATASET_NAME}'
331
+
332
+ # Download latest backup
333
+ temp_dir = tempfile.mkdtemp()
334
+ backup_path = Path(temp_dir) / 'latest.tar.gz'
335
+
336
+ try:
337
+ snapshot_path = api.hf_hub_download(
338
+ repo_id=dataset_id,
339
+ repo_type='dataset',
340
+ filename='snapshots/latest.tar.gz',
341
+ local_dir=temp_dir,
342
+ local_dir_use_symlinks=False
343
+ )
344
+ except RepositoryNotFoundError:
345
+ logger.info(f'No backup found in {dataset_id}')
346
+ return False
347
+
348
+ logger.info(f'Downloaded backup from {dataset_id}')
349
+
350
+ # Extract tarball
351
+ logger.info('Extracting backup...')
352
+ with tarfile.open(snapshot_path, 'r:gz') as tar:
353
+ tar.extractall(temp_dir)
354
+
355
+ dump_file = Path(temp_dir) / 'paperclip.sql'
356
+ if not dump_file.exists():
357
+ logger.error('SQL dump not found in backup')
358
+ return False
359
+
360
+ # Restore database
361
+ success = restore_database(str(dump_file))
362
+
363
+ # Restore Paperclip data files if present
364
+ paperclip_data_dir = Path(temp_dir) / 'paperclip-data'
365
+ if paperclip_data_dir.exists():
366
+ logger.info('Restoring Paperclip data files...')
367
+ import shutil
368
+ try:
369
+ for item in paperclip_data_dir.iterdir():
370
+ target = Path(PAPERCLIP_HOME) / item.name
371
+ if target.exists():
372
+ shutil.rmtree(target) if target.is_dir() else target.unlink()
373
+ shutil.copytree(item, target) if item.is_dir() else shutil.copy2(item, target)
374
+ logger.info('Data files restored')
375
+ except Exception as e:
376
+ logger.error(f'Failed to restore data files: {e}')
377
+
378
+ return success
379
+
380
+ except Exception as e:
381
+ logger.error(f'Failed to restore from HF: {e}')
382
+ return False
383
+
384
+ # ============================================================================
385
+ # Main Sync Operations
386
+ # ============================================================================
387
+
388
+ def sync_to_backup() -> bool:
389
+ """Full backup operation: dump DB β†’ create tarball β†’ upload to HF"""
390
+ logger.info('=' * 60)
391
+ logger.info('Starting backup operation')
392
+ logger.info('=' * 60)
393
+
394
+ status = read_status()
395
+
396
+ try:
397
+ # Step 1: Backup database
398
+ dump_file, success = backup_database()
399
+ if not success or not dump_file:
400
+ status['last_error'] = 'Database backup failed'
401
+ status['db_status'] = 'error'
402
+ write_status(status)
403
+ return False
404
+
405
+ # Step 2: Create tarball
406
+ tarball_file, success = create_backup_tarball(dump_file)
407
+ if not success or not tarball_file:
408
+ status['last_error'] = 'Tarball creation failed'
409
+ status['db_status'] = 'error'
410
+ write_status(status)
411
+ return False
412
+
413
+ # Step 3: Upload to HF
414
+ success = sync_to_hf(tarball_file)
415
+
416
+ # Update status
417
+ status['last_sync_time'] = datetime.utcnow().isoformat() + 'Z'
418
+ status['db_status'] = 'connected' if success else 'error'
419
+ status['last_error'] = None if success else 'Upload failed'
420
+ status['sync_count'] = status.get('sync_count', 0) + 1
421
+
422
+ write_status(status)
423
+
424
+ if success:
425
+ logger.info('Backup operation completed successfully')
426
+ else:
427
+ logger.warning('Backup operation completed with errors')
428
+
429
+ return success
430
+
431
+ except Exception as e:
432
+ logger.error(f'Backup operation failed: {e}')
433
+ status['last_error'] = str(e)
434
+ status['db_status'] = 'error'
435
+ write_status(status)
436
+ return False
437
+
438
+ def sync_from_backup() -> bool:
439
+ """Full restore operation: download from HF β†’ extract β†’ restore DB"""
440
+ logger.info('=' * 60)
441
+ logger.info('Starting restore operation')
442
+ logger.info('=' * 60)
443
+
444
+ status = read_status()
445
+
446
+ try:
447
+ success = sync_from_hf()
448
+
449
+ # Update status
450
+ status['db_status'] = 'connected' if success else 'error'
451
+ status['last_error'] = None if success else 'Restore failed'
452
+
453
+ write_status(status)
454
+
455
+ if success:
456
+ logger.info('Restore operation completed successfully')
457
+ else:
458
+ logger.warning('Restore operation completed (no backup or error)')
459
+
460
+ return success
461
+
462
+ except Exception as e:
463
+ logger.error(f'Restore operation failed: {e}')
464
+ status['last_error'] = str(e)
465
+ status['db_status'] = 'error'
466
+ write_status(status)
467
+ return False
468
+
469
+ # ============================================================================
470
+ # CLI
471
+ # ============================================================================
472
+
473
+ def main():
474
+ if len(sys.argv) < 2:
475
+ print('Usage: python3 paperclip-sync.py <command>')
476
+ print('Commands:')
477
+ print(' sync - Backup database to HF Dataset')
478
+ print(' restore - Restore database from HF Dataset backup')
479
+ sys.exit(1)
480
+
481
+ command = sys.argv[1]
482
+
483
+ if command == 'sync':
484
+ success = sync_to_backup()
485
+ sys.exit(0 if success else 1)
486
+ elif command == 'restore':
487
+ success = sync_from_backup()
488
+ sys.exit(0 if success else 1)
489
+ else:
490
+ print(f'Unknown command: {command}')
491
+ sys.exit(1)
492
+
493
+ if __name__ == '__main__':
494
+ main()
setup-uptimerobot.sh ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+
4
+ # Create or update a UptimeRobot monitor for this Hugging Face Space.
5
+ #
6
+ # Requirements:
7
+ # - UPTIMEROBOT_API_KEY: Main API key from UptimeRobot
8
+ # - SPACE_HOST or first CLI arg: your HF Space host, e.g. "user-space.hf.space"
9
+ #
10
+ # Optional:
11
+ # - UPTIMEROBOT_MONITOR_NAME: friendly name for the monitor
12
+ # - UPTIMEROBOT_ALERT_CONTACTS: dash-separated alert contact IDs, e.g. "123456-789012"
13
+ # - UPTIMEROBOT_INTERVAL: monitoring interval in minutes (subject to account limits)
14
+
15
+ API_URL="https://api.uptimerobot.com/v2"
16
+ API_KEY="${UPTIMEROBOT_API_KEY:-}"
17
+ SPACE_HOST_INPUT="${1:-${SPACE_HOST:-}}"
18
+
19
+ if [ -z "$API_KEY" ]; then
20
+ echo "Missing UPTIMEROBOT_API_KEY."
21
+ echo "Use the Main API key from UptimeRobot -> Integrations."
22
+ echo "Do not use the Read-only API key or a Monitor-specific API key."
23
+ exit 1
24
+ fi
25
+
26
+ if [ -z "$SPACE_HOST_INPUT" ]; then
27
+ echo "Missing Space host."
28
+ echo "Usage: UPTIMEROBOT_API_KEY=... ./setup-uptimerobot.sh your-space.hf.space"
29
+ exit 1
30
+ fi
31
+
32
+ SPACE_HOST_CLEAN="${SPACE_HOST_INPUT#https://}"
33
+ SPACE_HOST_CLEAN="${SPACE_HOST_CLEAN#http://}"
34
+ SPACE_HOST_CLEAN="${SPACE_HOST_CLEAN%%/*}"
35
+
36
+ MONITOR_URL="https://${SPACE_HOST_CLEAN}/health"
37
+ MONITOR_NAME="${UPTIMEROBOT_MONITOR_NAME:-HuggingClaw ${SPACE_HOST_CLEAN}}"
38
+ INTERVAL="${UPTIMEROBOT_INTERVAL:-5}"
39
+
40
+ echo "Checking existing UptimeRobot monitors for ${MONITOR_URL}..."
41
+ MONITORS_RESPONSE=$(curl -sS -X POST "${API_URL}/getMonitors" \
42
+ -d "api_key=${API_KEY}" \
43
+ -d "format=json" \
44
+ -d "logs=0" \
45
+ -d "response_times=0" \
46
+ -d "response_times_limit=1")
47
+
48
+ MONITOR_ID=$(printf '%s' "$MONITORS_RESPONSE" | jq -r --arg url "$MONITOR_URL" '
49
+ (.monitors // []) | map(select(.url == $url)) | first | .id // empty
50
+ ')
51
+
52
+ if [ -n "$MONITOR_ID" ]; then
53
+ echo "Monitor already exists (id=${MONITOR_ID}) for ${MONITOR_URL}"
54
+ exit 0
55
+ fi
56
+
57
+ echo "Creating new UptimeRobot monitor for ${MONITOR_URL}..."
58
+
59
+ CURL_ARGS=(
60
+ -sS
61
+ -X POST "${API_URL}/newMonitor"
62
+ -d "api_key=${API_KEY}"
63
+ -d "format=json"
64
+ -d "type=1"
65
+ -d "friendly_name=${MONITOR_NAME}"
66
+ -d "url=${MONITOR_URL}"
67
+ -d "interval=${INTERVAL}"
68
+ )
69
+
70
+ if [ -n "${UPTIMEROBOT_ALERT_CONTACTS:-}" ]; then
71
+ CURL_ARGS+=(-d "alert_contacts=${UPTIMEROBOT_ALERT_CONTACTS}")
72
+ fi
73
+
74
+ CREATE_RESPONSE=$(curl "${CURL_ARGS[@]}")
75
+ CREATE_STATUS=$(printf '%s' "$CREATE_RESPONSE" | jq -r '.stat // "fail"')
76
+
77
+ if [ "$CREATE_STATUS" != "ok" ]; then
78
+ echo "Failed to create monitor."
79
+ printf '%s\n' "$CREATE_RESPONSE"
80
+ exit 1
81
+ fi
82
+
83
+ NEW_ID=$(printf '%s' "$CREATE_RESPONSE" | jq -r '.monitor.id // empty')
84
+ echo "Created UptimeRobot monitor ${NEW_ID:-"(id unavailable)"} for ${MONITOR_URL}"
start.sh ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ umask 0077
5
+
6
+ # Colors for output
7
+ RED='\033[0;31m'
8
+ GREEN='\033[0;32m'
9
+ YELLOW='\033[1;33m'
10
+ BLUE='\033[0;34m'
11
+ NC='\033[0m' # No Color
12
+
13
+ # Banner
14
+ echo -e "${BLUE}"
15
+ cat << 'EOF'
16
+ ___ ___ _____ _ _
17
+ / _ \/ _ \___ __________/ ___/| (_)____
18
+ / ___/ ___/ _ `/ ___/ ___/\__ \ | | / __ \
19
+ / / / / / /_/ / / / /__/__/ / | | / /_/ /
20
+ \_/ \_/ \__,_/_/ \___/____/ |_|_/ .___/
21
+ /_/
22
+ EOF
23
+ echo -e "${NC}${GREEN}Starting HuggingClip (Paperclip on HF Spaces)${NC}\n"
24
+
25
+ # ============================================================================
26
+ # 1. Validate Environment Variables
27
+ # ============================================================================
28
+ echo -e "${BLUE}[1/8] Validating environment variables...${NC}"
29
+
30
+ REQUIRED_VARS=("HF_TOKEN")
31
+ MISSING_VARS=()
32
+
33
+ for var in "${REQUIRED_VARS[@]}"; do
34
+ if [ -z "${!var}" ]; then
35
+ MISSING_VARS+=("$var")
36
+ fi
37
+ done
38
+
39
+ if [ ${#MISSING_VARS[@]} -gt 0 ]; then
40
+ echo -e "${YELLOW}Warning: Missing env vars: ${MISSING_VARS[*]}${NC}"
41
+ echo -e "${YELLOW}Backup to HF Dataset will be disabled${NC}"
42
+ SYNC_DISABLED=true
43
+ else
44
+ SYNC_DISABLED=false
45
+ fi
46
+
47
+ # Default values
48
+ export DATABASE_URL="${DATABASE_URL:-postgres://postgres:paperclip@localhost:5432/paperclip}"
49
+ export PORT="${PORT:-3100}"
50
+ export SERVE_UI="${SERVE_UI:-true}"
51
+ export NODE_ENV="${NODE_ENV:-production}"
52
+ export HOST="${HOST:-0.0.0.0}"
53
+ export PAPERCLIP_HOME="${PAPERCLIP_HOME:-/paperclip}"
54
+ export PAPERCLIP_DEPLOYMENT_MODE="${PAPERCLIP_DEPLOYMENT_MODE:-authenticated}"
55
+ export SYNC_INTERVAL="${SYNC_INTERVAL:-180}"
56
+ export SYNC_MAX_FILE_BYTES="${SYNC_MAX_FILE_BYTES:-52428800}"
57
+ export BACKUP_DATASET_NAME="${BACKUP_DATASET_NAME:-paperclip-backup}"
58
+ export PAPERCLIP_TELEMETRY_DISABLED="${PAPERCLIP_TELEMETRY_DISABLED:-1}"
59
+ export DO_NOT_TRACK="${DO_NOT_TRACK:-1}"
60
+
61
+ echo -e "${GREEN}βœ“ Environment validated${NC}\n"
62
+
63
+ # ============================================================================
64
+ # 2. Initialize PostgreSQL
65
+ # ============================================================================
66
+ echo -e "${BLUE}[2/8] Setting up PostgreSQL database...${NC}"
67
+
68
+ # Start PostgreSQL if not running
69
+ if ! pgrep -x "postgres" > /dev/null; then
70
+ echo "Starting PostgreSQL daemon..."
71
+ su - postgres -c "/usr/lib/postgresql/*/bin/postgres -D /var/lib/postgresql/data" &
72
+ POSTGRES_PID=$!
73
+
74
+ # Wait for PostgreSQL to start
75
+ sleep 3
76
+ until pg_isready -h localhost -U postgres 2>/dev/null; do
77
+ sleep 1
78
+ done
79
+ fi
80
+
81
+ # Create paperclip user and database if they don't exist
82
+ psql -U postgres -tc "SELECT 1 FROM pg_user WHERE usename = 'postgres'" | grep -q 1 || \
83
+ psql -U postgres -c "CREATE USER postgres WITH PASSWORD 'paperclip' CREATEDB;" 2>/dev/null || true
84
+
85
+ psql -U postgres -tc "SELECT 1 FROM pg_database WHERE datname = 'paperclip'" | grep -q 1 || \
86
+ psql -U postgres -c "CREATE DATABASE paperclip OWNER postgres;" 2>/dev/null || true
87
+
88
+ echo -e "${GREEN}βœ“ PostgreSQL ready${NC}\n"
89
+
90
+ # ============================================================================
91
+ # 3. Restore from HF Dataset Backup
92
+ # ============================================================================
93
+ echo -e "${BLUE}[3/8] Restoring database from HF Dataset backup...${NC}"
94
+
95
+ if [ "$SYNC_DISABLED" = false ]; then
96
+ python3 /app/paperclip-sync.py restore 2>&1 || true
97
+ echo -e "${GREEN}βœ“ Restore attempt completed${NC}\n"
98
+ else
99
+ echo -e "${YELLOW}Skipping restore (no HF_TOKEN)${NC}\n"
100
+ fi
101
+
102
+ # ============================================================================
103
+ # 4. Setup Cloudflare Proxy (if token provided)
104
+ # ============================================================================
105
+ if [ -n "$CLOUDFLARE_WORKERS_TOKEN" ] && [ -n "$CLOUDFLARE_ACCOUNT_ID" ]; then
106
+ echo -e "${BLUE}[4/8] Setting up Cloudflare proxy...${NC}"
107
+ python3 /app/cloudflare-proxy-setup.py 2>&1 || echo -e "${YELLOW}Cloudflare setup failed, continuing without proxy${NC}"
108
+ echo ""
109
+ else
110
+ echo -e "${BLUE}[4/8] Cloudflare proxy (skipped - no credentials)${NC}\n"
111
+ fi
112
+
113
+ # ============================================================================
114
+ # 5. Start Background Sync Loop
115
+ # ============================================================================
116
+ echo -e "${BLUE}[5/8] Starting database sync loop...${NC}"
117
+
118
+ if [ "$SYNC_DISABLED" = false ]; then
119
+ # Start sync in background
120
+ (
121
+ while true; do
122
+ sleep "$SYNC_INTERVAL"
123
+ python3 /app/paperclip-sync.py sync 2>&1 || true
124
+ done
125
+ ) &
126
+ SYNC_PID=$!
127
+ echo -e "${GREEN}βœ“ Sync loop started (PID: $SYNC_PID)${NC}\n"
128
+ else
129
+ echo -e "${YELLOW}Sync disabled (no HF_TOKEN)${NC}\n"
130
+ fi
131
+
132
+ # ============================================================================
133
+ # 6. Start Health Server
134
+ # ============================================================================
135
+ echo -e "${BLUE}[6/8] Starting health server on port 7861...${NC}"
136
+
137
+ # Load Cloudflare proxy if available
138
+ if [ -f /app/cloudflare-proxy.js ]; then
139
+ export NODE_OPTIONS="--require /app/cloudflare-proxy.js"
140
+ fi
141
+
142
+ node /app/health-server.js &
143
+ HEALTH_PID=$!
144
+ echo -e "${GREEN}βœ“ Health server started (PID: $HEALTH_PID)${NC}\n"
145
+
146
+ # Wait for health server to start
147
+ sleep 2
148
+
149
+ # ============================================================================
150
+ # 7. Launch Paperclip
151
+ # ============================================================================
152
+ echo -e "${BLUE}[7/8] Launching Paperclip application...${NC}"
153
+
154
+ cd /app/paperclip
155
+
156
+ # Install Paperclip dependencies if needed
157
+ if [ ! -d "node_modules" ]; then
158
+ echo "Installing Paperclip dependencies..."
159
+ pnpm install 2>&1 | tail -5 || npm install 2>&1 | tail -5
160
+ fi
161
+
162
+ # Run Paperclip
163
+ export DATABASE_URL
164
+ export PORT
165
+ export SERVE_UI
166
+ export NODE_ENV
167
+ export HOST
168
+ export PAPERCLIP_HOME
169
+ export PAPERCLIP_DEPLOYMENT_MODE
170
+ export PAPERCLIP_TELEMETRY_DISABLED
171
+ export DO_NOT_TRACK
172
+
173
+ echo -e "${GREEN}βœ“ All systems ready${NC}"
174
+ echo -e "${GREEN}═══════════════════════════════════════════${NC}"
175
+ echo -e " Health Dashboard: http://localhost:7861/"
176
+ echo -e " Paperclip UI: http://localhost:7861/app/"
177
+ echo -e " API Endpoint: http://localhost:7861/api/*"
178
+ echo -e "${GREEN}═══════════════════════════════════════════${NC}\n"
179
+
180
+ # ============================================================================
181
+ # 8. Graceful Shutdown Handler
182
+ # ============================================================================
183
+ cleanup() {
184
+ echo -e "\n${YELLOW}[SHUTDOWN] Received termination signal...${NC}"
185
+ echo "Syncing data to HF Dataset..."
186
+
187
+ if [ "$SYNC_DISABLED" = false ]; then
188
+ python3 /app/paperclip-sync.py sync 2>&1 || true
189
+ fi
190
+
191
+ echo "Stopping services..."
192
+ [ -n "$HEALTH_PID" ] && kill $HEALTH_PID 2>/dev/null || true
193
+ [ -n "$SYNC_PID" ] && kill $SYNC_PID 2>/dev/null || true
194
+
195
+ echo -e "${GREEN}Shutdown complete${NC}"
196
+ exit 0
197
+ }
198
+
199
+ trap cleanup SIGTERM SIGINT
200
+
201
+ # Start Paperclip in foreground
202
+ exec node server.js