NitinBot001 commited on
Commit
c125555
·
verified ·
1 Parent(s): ea41682

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +8 -154
Dockerfile CHANGED
@@ -1,20 +1,15 @@
1
- # Dockerfile for Hugging Face Spaces - Non-root setup
2
  FROM node:18-slim
3
 
4
  # Create non-root user
5
- RUN groupadd -r firecrawl && useradd -r -g firecrawl firecrawl
6
 
7
- # Install system dependencies as root
8
  RUN apt-get update && apt-get install -y \
9
  git \
10
  curl \
11
- wget \
12
  bash \
13
  ca-certificates \
14
- redis-server \
15
- postgresql \
16
- postgresql-contrib \
17
- sudo \
18
  && rm -rf /var/lib/apt/lists/*
19
 
20
  # Create app directory and set permissions
@@ -25,153 +20,12 @@ RUN mkdir -p /home/firecrawl/app && \
25
  USER firecrawl
26
  WORKDIR /home/firecrawl/app
27
 
28
- # Clone Firecrawl repository
29
- RUN git clone https://github.com/mendableai/firecrawl.git .
30
-
31
- # Install Node.js dependencies for API
32
- WORKDIR /home/firecrawl/app/apps/api
33
- RUN npm ci --only=production
34
-
35
- # Install Node.js dependencies for Playwright service (lightweight version)
36
- WORKDIR /home/firecrawl/app/apps/playwright-service-ts
37
- RUN npm ci --only=production
38
-
39
- # Build the applications
40
- WORKDIR /home/firecrawl/app/apps/api
41
- RUN npm run build
42
-
43
- WORKDIR /home/firecrawl/app/apps/playwright-service-ts
44
- RUN npm run build
45
-
46
- # Create directories for runtime
47
- WORKDIR /home/firecrawl/app
48
- RUN mkdir -p logs tmp data redis-data postgres-data
49
-
50
- # Copy environment template and create .env
51
- RUN cp apps/api/.env.example .env
52
-
53
- # Create a simple startup script for HF Spaces
54
- RUN cat > start.sh << 'EOF' && chmod +x start.sh
55
- #!/bin/bash
56
-
57
- # Set environment variables for HF Spaces
58
- export PORT=7860
59
- export HOST=0.0.0.0
60
- export REDIS_URL=redis://localhost:6379
61
- export PLAYWRIGHT_MICROSERVICE_URL=http://localhost:3000/scrape
62
- export USE_DB_AUTHENTICATION=false
63
- export ENV=production
64
- export LOGGING_LEVEL=info
65
-
66
- # Start Redis in background (if we can)
67
- redis-server --daemonize yes --port 6379 --bind 127.0.0.1 --dir /home/firecrawl/app/redis-data || echo "Redis start failed, continuing..."
68
-
69
- # Start Playwright service in background
70
- cd /home/firecrawl/app/apps/playwright-service-ts
71
- npm start &
72
- PLAYWRIGHT_PID=$!
73
-
74
- # Wait a bit for Playwright to start
75
- sleep 5
76
-
77
- # Start API service
78
- cd /home/firecrawl/app/apps/api
79
- exec node dist/src/harness.js --start-docker
80
- EOF
81
-
82
- # Create a simple health check script
83
- RUN cat > health.sh << 'EOF' && chmod +x health.sh
84
- #!/bin/bash
85
- curl -f http://localhost:${PORT:-7860}/health 2>/dev/null || exit 1
86
- EOF
87
-
88
- # Create minimal configuration files
89
- RUN cat > /home/firecrawl/app/.env << 'EOF'
90
- # HF Spaces Configuration
91
- PORT=7860
92
- HOST=0.0.0.0
93
- ENV=production
94
- LOGGING_LEVEL=info
95
-
96
- # Disable database authentication for simplicity
97
- USE_DB_AUTHENTICATION=false
98
-
99
- # Redis (will try to use local instance)
100
- REDIS_URL=redis://localhost:6379
101
- REDIS_RATE_LIMIT_URL=redis://localhost:6379
102
-
103
- # Playwright service
104
- PLAYWRIGHT_MICROSERVICE_URL=http://localhost:3000/scrape
105
- BLOCK_MEDIA=true
106
-
107
- # Disable optional services that might not work in HF Spaces
108
- BULL_AUTH_KEY=fc-demo
109
- TEST_API_KEY=fc-demo-key
110
-
111
- # Model configuration (can be overridden with HF Spaces secrets)
112
- MODEL_NAME=gpt-3.5-turbo
113
- MODEL_EMBEDDING_NAME=text-embedding-ada-002
114
- EOF
115
-
116
- # Create a simple API-only version for HF Spaces constraints
117
- RUN cat > start-api-only.sh << 'EOF' && chmod +x start-api-only.sh
118
- #!/bin/bash
119
-
120
- # Simplified startup for HF Spaces - API only
121
- export PORT=${PORT:-7860}
122
- export HOST=0.0.0.0
123
- export ENV=production
124
- export LOGGING_LEVEL=info
125
- export USE_DB_AUTHENTICATION=false
126
-
127
- # Disable services that might not work
128
- export PLAYWRIGHT_MICROSERVICE_URL=http://localhost:3000/scrape
129
- export REDIS_URL=redis://localhost:6379
130
-
131
- echo "🔥 Starting Firecrawl API on port $PORT"
132
- echo "⚠️ Note: This is a simplified version for Hugging Face Spaces"
133
- echo "📝 Some features may be limited due to platform constraints"
134
-
135
- cd /home/firecrawl/app/apps/api
136
-
137
- # Create a simple in-memory fallback if Redis isn't available
138
- cat > dist/src/lib/redis-fallback.js << 'JSEOF'
139
- // Simple in-memory fallback for Redis when not available
140
- class MemoryStore {
141
- constructor() {
142
- this.store = new Map();
143
- }
144
-
145
- async get(key) {
146
- return this.store.get(key);
147
- }
148
-
149
- async set(key, value, options = {}) {
150
- this.store.set(key, value);
151
- if (options.EX) {
152
- setTimeout(() => this.store.delete(key), options.EX * 1000);
153
- }
154
- return 'OK';
155
- }
156
-
157
- async del(key) {
158
- return this.store.delete(key);
159
- }
160
- }
161
-
162
- module.exports = { MemoryStore };
163
- JSEOF
164
-
165
- # Start the API service
166
- exec node dist/src/harness.js --start-docker
167
- EOF
168
 
169
  # Expose the port that HF Spaces expects
170
  EXPOSE 7860
171
 
172
- # Health check
173
- HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
174
- CMD ./health.sh
175
-
176
- # Default command - use the API-only version for HF Spaces
177
- CMD ["./start-api-only.sh"]
 
1
+ # Minimal Dockerfile for Hugging Face Spaces - Non-root setup
2
  FROM node:18-slim
3
 
4
  # Create non-root user
5
+ RUN groupadd -r firecrawl && useradd -r -g firecrawl -m firecrawl
6
 
7
+ # Install only essential system dependencies
8
  RUN apt-get update && apt-get install -y \
9
  git \
10
  curl \
 
11
  bash \
12
  ca-certificates \
 
 
 
 
13
  && rm -rf /var/lib/apt/lists/*
14
 
15
  # Create app directory and set permissions
 
20
  USER firecrawl
21
  WORKDIR /home/firecrawl/app
22
 
23
+ # Copy the setup script
24
+ COPY --chown=firecrawl:firecrawl setup-firecrawl.sh /home/firecrawl/app/
25
+ RUN chmod +x setup-firecrawl.sh
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Expose the port that HF Spaces expects
28
  EXPOSE 7860
29
 
30
+ # Run the setup script which handles everything
31
+ CMD ["./setup-firecrawl.sh"]