Death fuck commited on
Commit
ce83c64
·
1 Parent(s): c15e636
Files changed (9) hide show
  1. .gitignore +1 -1
  2. .replit +9 -5
  3. README.md +13 -130
  4. app.py +278 -263
  5. document_scanner.py +4 -3
  6. hf_client.py +0 -391
  7. my_ssh_key.txt +0 -0
  8. requirements.txt +0 -6
  9. templates/index.html +114 -233
.gitignore CHANGED
@@ -19,7 +19,7 @@ wheels/
19
  *.egg-info/
20
  .installed.cfg
21
  *.egg
22
- *.zip
23
  # Virtual environments
24
  .env
25
  .venv
 
19
  *.egg-info/
20
  .installed.cfg
21
  *.egg
22
+
23
  # Virtual environments
24
  .env
25
  .venv
.replit CHANGED
@@ -4,7 +4,7 @@ expertMode = true
4
 
5
  [nix]
6
  channel = "stable-25_05"
7
- packages = ["freetype", "gut", "lcms2", "libimagequant", "libjpeg", "libjpeg_turbo", "libpng", "libtiff", "libwebp", "libxcrypt", "oneDNN", "openjpeg", "re2", "tcl", "tk", "which", "zlib", "python313Packages.huggingface-hub"]
8
 
9
  [workflows]
10
  runButton = "Project"
@@ -22,18 +22,22 @@ args = "AI Image Enhancer"
22
  name = "AI Image Enhancer"
23
  author = "agent"
24
 
25
- [workflows.workflow.metadata]
26
- outputType = "webview"
27
-
28
  [[workflows.workflow.tasks]]
29
  task = "shell.exec"
30
- args = "python -m uvicorn app:app --host 0.0.0.0 --port 5000"
31
  waitForPort = 5000
32
 
 
 
 
33
  [[ports]]
34
  localPort = 5000
35
  externalPort = 80
36
 
 
 
 
 
37
  [[ports]]
38
  localPort = 39671
39
  externalPort = 3002
 
4
 
5
  [nix]
6
  channel = "stable-25_05"
7
+ packages = ["freetype", "lcms2", "libimagequant", "libjpeg_turbo", "libpng", "libtiff", "libwebp", "libxcrypt", "tcl", "tk", "which", "zlib", "gut"]
8
 
9
  [workflows]
10
  runButton = "Project"
 
22
  name = "AI Image Enhancer"
23
  author = "agent"
24
 
 
 
 
25
  [[workflows.workflow.tasks]]
26
  task = "shell.exec"
27
+ args = "python app_local.py"
28
  waitForPort = 5000
29
 
30
+ [workflows.workflow.metadata]
31
+ outputType = "webview"
32
+
33
  [[ports]]
34
  localPort = 5000
35
  externalPort = 80
36
 
37
+ [[ports]]
38
+ localPort = 38887
39
+ externalPort = 3000
40
+
41
  [[ports]]
42
  localPort = 39671
43
  externalPort = 3002
README.md CHANGED
@@ -10,69 +10,34 @@ license: mit
10
 
11
  # AI Image Processing API
12
 
13
- A comprehensive image processing API powered by HuggingFace Inference API with multiple AI features including text-to-image generation, super-resolution, background removal, noise reduction, and document scanning.
14
 
15
  ## Features
16
 
17
- - **Image Generation**: Create images from text prompts using Stable Diffusion XL
18
- - **Image Enhancement**: Upscale images 2x or 4x using SD x4 Upscaler
19
- - **Background Removal**: Remove backgrounds using RMBG-1.4 AI model
20
  - **Noise Reduction**: Reduce image noise using OpenCV Non-Local Means Denoising
21
  - **Document Scanning**: Auto-crop, align, and enhance document photos with AI
22
- - **Async Processing**: All endpoints support async mode with progress tracking
23
  - **RESTful API**: Full API with automatic OpenAPI/Swagger documentation
24
  - **Web Interface**: Simple drag-and-drop interface for testing
25
 
26
  ## API Endpoints
27
 
28
- ### Image Generation
29
- #### `POST /generate`
30
- Generate images from text prompts using Stable Diffusion XL.
31
-
32
- **Parameters:**
33
- - `prompt`: Text description of the image to generate (required)
34
- - `negative_prompt`: What to avoid in the image (optional)
35
- - `width`: Image width 512-1024 (default: 1024)
36
- - `height`: Image height 512-1024 (default: 1024)
37
- - `guidance_scale`: Prompt adherence 1-20 (default: 7.5)
38
- - `steps`: Inference steps 20-100 (default: 50)
39
- - `async_mode`: Use async mode with progress tracking (default: false)
40
-
41
- #### `POST /generate/async`
42
- Start async image generation with progress tracking.
43
-
44
- #### `POST /generate/base64`
45
- Generate an image and return as base64-encoded string.
46
-
47
  ### Image Enhancement
48
  #### `POST /enhance`
49
- Upscale and enhance image quality using SD x4 Upscaler via HuggingFace.
50
 
51
  **Parameters:**
52
  - `file`: Image file (PNG, JPG, JPEG, WebP, BMP)
53
  - `scale`: Upscale factor (2 or 4, default: 4)
54
- - `async_mode`: Use async mode with progress tracking (default: false)
55
-
56
- #### `POST /enhance/async`
57
- Start async image enhancement with progress tracking.
58
-
59
- #### `POST /enhance/base64`
60
- Enhance an image and return as base64-encoded string.
61
 
62
  ### Background Removal
63
  #### `POST /remove-background`
64
- Remove background from an image using RMBG-1.4 via HuggingFace.
65
 
66
  **Parameters:**
67
  - `file`: Image file
68
  - `bgcolor`: Background color - 'transparent', 'white', 'black', or hex color like '#FF0000'
69
- - `async_mode`: Use async mode with progress tracking (default: false)
70
-
71
- #### `POST /remove-background/async`
72
- Start async background removal with progress tracking.
73
-
74
- #### `POST /remove-background/base64`
75
- Remove background and return as base64-encoded string.
76
 
77
  ### Noise Reduction
78
  #### `POST /denoise`
@@ -81,13 +46,6 @@ Reduce image noise using Non-Local Means Denoising.
81
  **Parameters:**
82
  - `file`: Image file
83
  - `strength`: Denoising strength (1-30, default: 10)
84
- - `async_mode`: Use async mode with progress tracking (default: false)
85
-
86
- #### `POST /denoise/async`
87
- Start async denoising with progress tracking.
88
-
89
- #### `POST /denoise/base64`
90
- Denoise an image and return as base64-encoded string.
91
 
92
  ### Document Scanning
93
  #### `POST /docscan`
@@ -100,50 +58,31 @@ Scan and enhance document images with AI-powered processing.
100
  - CLAHE contrast enhancement
101
  - Bilateral noise reduction (preserves edges)
102
  - Unsharp mask sharpening
103
- - Optional HD upscaling with HuggingFace SD Upscaler
104
 
105
  **Parameters:**
106
  - `file`: Document image (PNG, JPG, JPEG, WebP, BMP)
107
  - `enhance_hd`: Enable AI HD enhancement (default: true)
108
  - `scale`: Upscale factor 1-4 (default: 2)
109
- - `async_mode`: Use async mode with progress tracking (default: false)
110
-
111
- #### `POST /docscan/async`
112
- Start async document scanning with progress tracking.
113
-
114
- #### `POST /docscan/base64`
115
- Scan a document and return as base64-encoded string.
116
-
117
- ### Async Job Management
118
- - `GET /progress/{job_id}` - Get job progress and status
119
- - `GET /result/{job_id}` - Get the result of a completed job
120
 
121
  ### Other Endpoints
122
  - `GET /docs` - Interactive Swagger UI documentation
123
  - `GET /redoc` - ReDoc documentation
124
- - `GET /model-info` - Get information about AI models
125
  - `GET /health` - Health check endpoint
126
 
127
- ## Models Used (HuggingFace Inference API)
128
 
129
  | Feature | Model | Description |
130
  |---------|-------|-------------|
131
- | Text-to-Image | Stable Diffusion XL | State-of-the-art text-to-image generation |
132
- | Super Resolution | SD x4 Upscaler | AI-powered 4x image upscaling |
133
- | Background Removal | RMBG-1.4 | High-accuracy background removal |
134
  | Noise Reduction | OpenCV NLM | Non-Local Means Denoising |
135
- | Document Scanning | OpenCV + SD Upscaler | Edge detection, perspective correction, HD enhancement |
136
-
137
- ## Environment Variables
138
-
139
- - `HF_TOKEN`: HuggingFace API token (required for AI features)
140
 
141
  ## Local Development
142
 
143
  ```bash
144
- # Set your HuggingFace token
145
- export HF_TOKEN="your_huggingface_token"
146
-
147
  # Install dependencies
148
  pip install -r requirements.txt
149
 
@@ -157,63 +96,11 @@ The server will start at `http://localhost:7860`
157
 
158
  1. Create a new Space on Hugging Face
159
  2. Select "Docker" as the SDK
160
- 3. Add your `HF_TOKEN` as a secret
161
- 4. Upload all files from this repository
162
- 5. The Space will automatically build and start the container
163
 
164
  ## API Usage Examples
165
 
166
- ### Python - Image Generation
167
- ```python
168
- import requests
169
-
170
- response = requests.post(
171
- "https://your-space.hf.space/generate",
172
- params={
173
- "prompt": "A beautiful sunset over mountains, photorealistic, 8k",
174
- "negative_prompt": "blurry, low quality",
175
- "width": 1024,
176
- "height": 1024
177
- }
178
- )
179
-
180
- with open("generated.png", "wb") as f:
181
- f.write(response.content)
182
- ```
183
-
184
- ### Python - Image Generation (Async)
185
- ```python
186
- import requests
187
- import time
188
-
189
- # Start generation
190
- response = requests.post(
191
- "https://your-space.hf.space/generate/async",
192
- params={
193
- "prompt": "A futuristic city at night, cyberpunk style"
194
- }
195
- )
196
- data = response.json()
197
- job_id = data["job_id"]
198
-
199
- # Poll for progress
200
- while True:
201
- progress = requests.get(f"https://your-space.hf.space/progress/{job_id}").json()
202
- print(f"Progress: {progress['progress']}% - {progress['message']}")
203
-
204
- if progress["status"] == "completed":
205
- break
206
- elif progress["status"] == "failed":
207
- raise Exception(progress.get("error", "Generation failed"))
208
-
209
- time.sleep(2)
210
-
211
- # Get result
212
- result = requests.get(f"https://your-space.hf.space/result/{job_id}")
213
- with open("generated.png", "wb") as f:
214
- f.write(result.content)
215
- ```
216
-
217
  ### Python - Image Enhancement
218
  ```python
219
  import requests
@@ -276,10 +163,6 @@ with open("scanned_document.png", "wb") as f:
276
 
277
  ### cURL Examples
278
  ```bash
279
- # Generate image
280
- curl -X POST "https://your-space.hf.space/generate?prompt=A%20beautiful%20landscape" \
281
- --output generated.png
282
-
283
  # Enhance image
284
  curl -X POST "https://your-space.hf.space/enhance?scale=4" \
285
  -F "file=@image.jpg" -o enhanced.png
 
10
 
11
  # AI Image Processing API
12
 
13
+ A comprehensive image processing API with multiple AI-powered features including super-resolution, background removal, noise reduction, and document scanning.
14
 
15
  ## Features
16
 
17
+ - **Image Enhancement**: Upscale images 2x or 4x using Real-ESRGAN
18
+ - **Background Removal**: Remove backgrounds using BiRefNet AI model via rembg
 
19
  - **Noise Reduction**: Reduce image noise using OpenCV Non-Local Means Denoising
20
  - **Document Scanning**: Auto-crop, align, and enhance document photos with AI
 
21
  - **RESTful API**: Full API with automatic OpenAPI/Swagger documentation
22
  - **Web Interface**: Simple drag-and-drop interface for testing
23
 
24
  ## API Endpoints
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  ### Image Enhancement
27
  #### `POST /enhance`
28
+ Upscale and enhance image quality using Real-ESRGAN.
29
 
30
  **Parameters:**
31
  - `file`: Image file (PNG, JPG, JPEG, WebP, BMP)
32
  - `scale`: Upscale factor (2 or 4, default: 4)
 
 
 
 
 
 
 
33
 
34
  ### Background Removal
35
  #### `POST /remove-background`
36
+ Remove background from an image using BiRefNet AI model.
37
 
38
  **Parameters:**
39
  - `file`: Image file
40
  - `bgcolor`: Background color - 'transparent', 'white', 'black', or hex color like '#FF0000'
 
 
 
 
 
 
 
41
 
42
  ### Noise Reduction
43
  #### `POST /denoise`
 
46
  **Parameters:**
47
  - `file`: Image file
48
  - `strength`: Denoising strength (1-30, default: 10)
 
 
 
 
 
 
 
49
 
50
  ### Document Scanning
51
  #### `POST /docscan`
 
58
  - CLAHE contrast enhancement
59
  - Bilateral noise reduction (preserves edges)
60
  - Unsharp mask sharpening
61
+ - Optional HD upscaling with Real-ESRGAN
62
 
63
  **Parameters:**
64
  - `file`: Document image (PNG, JPG, JPEG, WebP, BMP)
65
  - `enhance_hd`: Enable AI HD enhancement (default: true)
66
  - `scale`: Upscale factor 1-4 (default: 2)
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  ### Other Endpoints
69
  - `GET /docs` - Interactive Swagger UI documentation
70
  - `GET /redoc` - ReDoc documentation
71
+ - `GET /model-info` - Get information about loaded AI models
72
  - `GET /health` - Health check endpoint
73
 
74
+ ## Models Used
75
 
76
  | Feature | Model | Description |
77
  |---------|-------|-------------|
78
+ | Super Resolution | Real-ESRGAN x4plus | State-of-the-art image upscaling |
79
+ | Background Removal | BiRefNet-general | High-accuracy segmentation via rembg |
 
80
  | Noise Reduction | OpenCV NLM | Non-Local Means Denoising |
81
+ | Document Scanning | OpenCV + Real-ESRGAN | Edge detection, perspective correction, HD enhancement |
 
 
 
 
82
 
83
  ## Local Development
84
 
85
  ```bash
 
 
 
86
  # Install dependencies
87
  pip install -r requirements.txt
88
 
 
96
 
97
  1. Create a new Space on Hugging Face
98
  2. Select "Docker" as the SDK
99
+ 3. Upload all files from this repository
100
+ 4. The Space will automatically build and start the container
 
101
 
102
  ## API Usage Examples
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  ### Python - Image Enhancement
105
  ```python
106
  import requests
 
163
 
164
  ### cURL Examples
165
  ```bash
 
 
 
 
166
  # Enhance image
167
  curl -X POST "https://your-space.hf.space/enhance?scale=4" \
168
  -F "file=@image.jpg" -o enhanced.png
app.py CHANGED
@@ -11,7 +11,6 @@ from fastapi.middleware.cors import CORSMiddleware
11
  from PIL import Image
12
  import numpy as np
13
  from progress_tracker import get_tracker, JobStatus
14
- import hf_client
15
 
16
  UPLOAD_DIR = Path("uploads")
17
  OUTPUT_DIR = Path("outputs")
@@ -25,27 +24,25 @@ app = FastAPI(
25
  description="""
26
  ## AI-Powered Image Processing API
27
 
28
- A comprehensive image processing API powered by HuggingFace Inference API.
29
 
30
  ### Features:
31
- - **Image Generation**: Generate images from text prompts using Stable Diffusion XL
32
- - **Image Upscaling**: Enhance image resolution up to 4x using SD x4 Upscaler
33
- - **Background Removal**: Remove backgrounds using RMBG-1.4 model
34
  - **Noise Reduction**: Reduce image noise using advanced denoising algorithms
35
  - **Document Scanning**: Auto-crop, align, and enhance document photos with AI
36
- - **Async Processing**: All endpoints support async mode with progress tracking
37
 
38
  ### Supported Formats:
39
  - PNG, JPG, JPEG, WebP, BMP
40
 
41
- ### Models Used (HuggingFace Inference API):
42
- - **Text-to-Image**: Stable Diffusion XL (stabilityai/stable-diffusion-xl-base-1.0)
43
- - **Super Resolution**: SD x4 Upscaler (stabilityai/stable-diffusion-x4-upscaler)
44
- - **Background Removal**: RMBG-1.4 (briaai/RMBG-1.4)
45
  - **Noise Reduction**: OpenCV Non-Local Means Denoising
46
- - **Document Scanner**: OpenCV edge detection + SD Upscaler
47
  """,
48
- version="3.0.0",
49
  docs_url="/docs",
50
  redoc_url="/redoc",
51
  )
@@ -58,8 +55,26 @@ app.add_middleware(
58
  allow_headers=["*"],
59
  )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  @app.get("/", response_class=HTMLResponse)
62
  async def home():
 
63
  html_path = Path("templates/index.html")
64
  if html_path.exists():
65
  return html_path.read_text()
@@ -75,17 +90,22 @@ async def home():
75
 
76
  @app.get("/health")
77
  async def health_check():
78
- hf_token_set = bool(os.environ.get("HF_TOKEN"))
79
  return {
80
- "status": "healthy",
81
- "version": "3.0.0",
82
- "hf_token_configured": hf_token_set,
83
- "features": ["generate", "enhance", "remove-background", "denoise", "docscan", "progress-tracking"],
84
- "api_provider": "HuggingFace Inference API"
85
  }
86
 
87
  @app.get("/progress/{job_id}")
88
  async def get_progress(job_id: str):
 
 
 
 
 
 
 
89
  progress = tracker.get_progress(job_id)
90
  if progress is None:
91
  raise HTTPException(status_code=404, detail="Job not found")
@@ -93,6 +113,13 @@ async def get_progress(job_id: str):
93
 
94
  @app.get("/result/{job_id}")
95
  async def get_result(job_id: str):
 
 
 
 
 
 
 
96
  job = tracker.get_job(job_id)
97
  if job is None:
98
  raise HTTPException(status_code=404, detail="Job not found")
@@ -122,187 +149,67 @@ async def get_result(job_id: str):
122
 
123
  @app.get("/model-info")
124
  async def model_info():
125
- return hf_client.get_model_info()
126
-
127
-
128
- def process_generate_job(job_id: str, prompt: str, negative_prompt: str, width: int, height: int, guidance_scale: float, steps: int, output_path: Path):
129
- try:
130
- def progress_callback(progress, message):
131
- tracker.update_progress(job_id, progress, message)
132
-
133
- image = hf_client.generate_image(
134
- prompt=prompt,
135
- negative_prompt=negative_prompt,
136
- width=width,
137
- height=height,
138
- guidance_scale=guidance_scale,
139
- num_inference_steps=steps,
140
- progress_callback=progress_callback
141
- )
142
-
143
- image.save(output_path, "PNG")
144
- tracker.complete_job(job_id, str(output_path), f"Generated {width}x{height} image")
145
-
146
- except Exception as e:
147
- tracker.fail_job(job_id, str(e))
148
-
149
- @app.post("/generate/async")
150
- async def generate_image_async(
151
- prompt: str = Query(..., description="Text prompt describing the image to generate"),
152
- negative_prompt: str = Query(default="", description="What to avoid in the image"),
153
- width: int = Query(default=1024, ge=512, le=1024, description="Image width (512-1024)"),
154
- height: int = Query(default=1024, ge=512, le=1024, description="Image height (512-1024)"),
155
- guidance_scale: float = Query(default=7.5, ge=1.0, le=20.0, description="How closely to follow the prompt (1-20)"),
156
- steps: int = Query(default=50, ge=20, le=100, description="Number of inference steps (20-100)")
157
- ):
158
- """
159
- Start async image generation with progress tracking.
160
-
161
- Uses Stable Diffusion XL via HuggingFace Inference API.
162
-
163
- Returns a job_id for progress tracking via /progress/{job_id}
164
- """
165
- job_id = tracker.create_job("Starting image generation...")
166
- file_id = str(uuid.uuid4())
167
- output_path = OUTPUT_DIR / f"{file_id}_generated.png"
168
-
169
- thread = threading.Thread(
170
- target=process_generate_job,
171
- args=(job_id, prompt, negative_prompt, width, height, guidance_scale, steps, output_path)
172
- )
173
- thread.start()
174
-
175
- return JSONResponse({
176
- "job_id": job_id,
177
- "status": "processing",
178
- "message": "Image generation started. Poll /progress/{job_id} for updates.",
179
- "progress_url": f"/progress/{job_id}",
180
- "result_url": f"/result/{job_id}"
181
- })
182
-
183
- @app.post("/generate")
184
- async def generate_image(
185
- prompt: str = Query(..., description="Text prompt describing the image to generate"),
186
- negative_prompt: str = Query(default="", description="What to avoid in the image"),
187
- width: int = Query(default=1024, ge=512, le=1024, description="Image width (512-1024)"),
188
- height: int = Query(default=1024, ge=512, le=1024, description="Image height (512-1024)"),
189
- guidance_scale: float = Query(default=7.5, ge=1.0, le=20.0, description="How closely to follow the prompt (1-20)"),
190
- steps: int = Query(default=50, ge=20, le=100, description="Number of inference steps (20-100)"),
191
- async_mode: bool = Query(default=False, description="Use async mode with progress tracking")
192
- ):
193
- """
194
- Generate an image from a text prompt using Stable Diffusion XL.
195
-
196
- - **prompt**: Describe what you want to see in the image
197
- - **negative_prompt**: Describe what you want to avoid
198
- - **width/height**: Image dimensions (512-1024, must be multiples of 8)
199
- - **guidance_scale**: Higher values follow the prompt more closely
200
- - **steps**: More steps = higher quality but slower
201
- - **async_mode**: If true, returns job_id for progress tracking
202
-
203
- Returns the generated image as PNG (or job_id if async_mode=true).
204
- """
205
- if async_mode:
206
- job_id = tracker.create_job("Starting image generation...")
207
- file_id = str(uuid.uuid4())
208
- output_path = OUTPUT_DIR / f"{file_id}_generated.png"
209
-
210
- thread = threading.Thread(
211
- target=process_generate_job,
212
- args=(job_id, prompt, negative_prompt, width, height, guidance_scale, steps, output_path)
213
- )
214
- thread.start()
215
-
216
- return JSONResponse({
217
- "job_id": job_id,
218
- "status": "processing",
219
- "message": "Image generation started. Poll /progress/{job_id} for updates.",
220
- "progress_url": f"/progress/{job_id}",
221
- "result_url": f"/result/{job_id}"
222
- })
223
-
224
- try:
225
- image = hf_client.generate_image(
226
- prompt=prompt,
227
- negative_prompt=negative_prompt,
228
- width=width,
229
- height=height,
230
- guidance_scale=guidance_scale,
231
- num_inference_steps=steps
232
- )
233
-
234
- file_id = str(uuid.uuid4())
235
- output_path = OUTPUT_DIR / f"{file_id}_generated.png"
236
- image.save(output_path, "PNG")
237
-
238
- return FileResponse(
239
- output_path,
240
- media_type="image/png",
241
- filename=f"generated_{file_id[:8]}.png"
242
- )
243
-
244
- except Exception as e:
245
- raise HTTPException(status_code=500, detail=f"Error generating image: {str(e)}")
246
-
247
- @app.post("/generate/base64")
248
- async def generate_image_base64(
249
- prompt: str = Query(..., description="Text prompt describing the image to generate"),
250
- negative_prompt: str = Query(default="", description="What to avoid in the image"),
251
- width: int = Query(default=1024, ge=512, le=1024, description="Image width (512-1024)"),
252
- height: int = Query(default=1024, ge=512, le=1024, description="Image height (512-1024)"),
253
- guidance_scale: float = Query(default=7.5, ge=1.0, le=20.0, description="How closely to follow the prompt"),
254
- steps: int = Query(default=50, ge=20, le=100, description="Number of inference steps")
255
- ):
256
- """
257
- Generate an image and return it as base64-encoded string.
258
-
259
- Uses Stable Diffusion XL via HuggingFace Inference API.
260
- """
261
- try:
262
- image = hf_client.generate_image(
263
- prompt=prompt,
264
- negative_prompt=negative_prompt,
265
- width=width,
266
- height=height,
267
- guidance_scale=guidance_scale,
268
- num_inference_steps=steps
269
- )
270
-
271
- buffer = io.BytesIO()
272
- image.save(buffer, format="PNG")
273
- buffer.seek(0)
274
-
275
- img_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
276
-
277
- return JSONResponse({
278
- "success": True,
279
- "image_base64": img_base64,
280
- "size": {"width": image.width, "height": image.height},
281
- "prompt": prompt,
282
- "model": "stabilityai/stable-diffusion-xl-base-1.0"
283
- })
284
-
285
- except Exception as e:
286
- raise HTTPException(status_code=500, detail=f"Error generating image: {str(e)}")
287
-
288
 
289
  def process_enhance_job(job_id: str, image_bytes: bytes, scale: int, output_path: Path, filename: str):
 
290
  try:
291
  input_image = Image.open(io.BytesIO(image_bytes))
292
 
293
  if input_image.mode != "RGB":
294
  input_image = input_image.convert("RGB")
295
 
296
- def progress_callback(progress, message):
297
- tracker.update_progress(job_id, progress, message)
 
 
 
298
 
299
- enhanced_image = hf_client.upscale_image(
300
- image=input_image,
301
- scale=scale,
302
- progress_callback=progress_callback
303
- )
 
 
 
 
 
 
 
 
 
 
 
304
 
305
- enhanced_image.save(output_path, "PNG")
306
  tracker.complete_job(job_id, str(output_path), f"Enhanced to {enhanced_image.width}x{enhanced_image.height}")
307
 
308
  except Exception as e:
@@ -312,14 +219,16 @@ def process_enhance_job(job_id: str, image_bytes: bytes, scale: int, output_path
312
  async def enhance_image_async(
313
  background_tasks: BackgroundTasks,
314
  file: UploadFile = File(..., description="Image file to enhance (PNG, JPG, JPEG, WebP, BMP)"),
315
- scale: int = Query(default=4, ge=2, le=4, description="Upscale factor (2 or 4)")
316
  ):
317
  """
318
  Start async image enhancement with progress tracking.
319
 
320
- Uses SD x4 Upscaler via HuggingFace Inference API.
 
321
 
322
- Returns a job_id for progress tracking via /progress/{job_id}
 
323
  """
324
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
325
  if file.content_type not in allowed_types:
@@ -350,14 +259,14 @@ async def enhance_image_async(
350
  @app.post("/enhance")
351
  async def enhance_image(
352
  file: UploadFile = File(..., description="Image file to enhance (PNG, JPG, JPEG, WebP, BMP)"),
353
- scale: int = Query(default=4, ge=2, le=4, description="Upscale factor (2 or 4)"),
354
  async_mode: bool = Query(default=False, description="Use async mode with progress tracking")
355
  ):
356
  """
357
- Enhance an image using SD x4 Upscaler via HuggingFace Inference API.
358
 
359
  - **file**: Upload an image file (PNG, JPG, JPEG, WebP, BMP)
360
- - **scale**: Upscaling factor - 2x or 4x resolution
361
  - **async_mode**: If true, returns job_id for progress tracking instead of waiting
362
 
363
  Returns the enhanced image as a PNG file (or job_id if async_mode=true).
@@ -396,11 +305,28 @@ async def enhance_image(
396
  if input_image.mode != "RGB":
397
  input_image = input_image.convert("RGB")
398
 
399
- enhanced_image = hf_client.upscale_image(image=input_image, scale=scale)
 
 
 
 
400
 
401
  file_id = str(uuid.uuid4())
402
  output_path = OUTPUT_DIR / f"{file_id}_enhanced.png"
403
- enhanced_image.save(output_path, "PNG")
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
  return FileResponse(
406
  output_path,
@@ -414,13 +340,15 @@ async def enhance_image(
414
  @app.post("/enhance/base64")
415
  async def enhance_image_base64(
416
  file: UploadFile = File(..., description="Image file to enhance"),
417
- scale: int = Query(default=4, ge=2, le=4, description="Upscale factor (2 or 4)")
418
  ):
419
  """
420
  Enhance an image and return it as base64-encoded string.
421
 
422
- Uses SD x4 Upscaler via HuggingFace Inference API.
423
  """
 
 
424
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
425
  if file.content_type not in allowed_types:
426
  raise HTTPException(
@@ -435,7 +363,23 @@ async def enhance_image_base64(
435
  if input_image.mode != "RGB":
436
  input_image = input_image.convert("RGB")
437
 
438
- enhanced_image = hf_client.upscale_image(image=input_image, scale=scale)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
  buffer = io.BytesIO()
441
  enhanced_image.save(buffer, format="PNG")
@@ -448,28 +392,19 @@ async def enhance_image_base64(
448
  "image_base64": img_base64,
449
  "original_size": {"width": input_image.width, "height": input_image.height},
450
  "enhanced_size": {"width": enhanced_image.width, "height": enhanced_image.height},
451
- "scale_factor": scale,
452
- "model": "stabilityai/stable-diffusion-x4-upscaler"
453
  })
454
 
455
  except Exception as e:
456
  raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
457
 
458
-
459
  def process_remove_bg_job(job_id: str, image_bytes: bytes, bgcolor: str, output_path: Path):
 
460
  try:
461
- def progress_callback(progress, message):
462
- tracker.update_progress(job_id, progress, message)
463
-
464
- output_image = hf_client.remove_background(
465
- image_bytes=image_bytes,
466
- progress_callback=progress_callback
467
- )
468
 
 
469
  if bgcolor != "transparent":
470
- tracker.update_progress(job_id, 85.0, "Applying background color...")
471
- background = Image.new("RGBA", output_image.size)
472
-
473
  if bgcolor == "white":
474
  bg_color = (255, 255, 255, 255)
475
  elif bgcolor == "black":
@@ -479,16 +414,24 @@ def process_remove_bg_job(job_id: str, image_bytes: bytes, bgcolor: str, output_
479
  if len(hex_color) == 6:
480
  r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
481
  bg_color = (r, g, b, 255)
482
- else:
483
- bg_color = (255, 255, 255, 255)
484
- else:
485
- bg_color = (255, 255, 255, 255)
486
-
487
- background = Image.new("RGBA", output_image.size, bg_color)
488
- background.paste(output_image, mask=output_image.split()[3] if output_image.mode == "RGBA" else None)
489
- output_image = background
490
 
491
- tracker.update_progress(job_id, 95.0, "Saving result...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  output_image.save(output_path, "PNG")
493
  tracker.complete_job(job_id, str(output_path), "Background removed successfully")
494
 
@@ -503,8 +446,6 @@ async def remove_background_async(
503
  """
504
  Start async background removal with progress tracking.
505
 
506
- Uses RMBG-1.4 via HuggingFace Inference API.
507
-
508
  Returns a job_id for progress tracking via /progress/{job_id}
509
  """
510
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
@@ -537,7 +478,7 @@ async def remove_background(
537
  async_mode: bool = Query(default=False, description="Use async mode with progress tracking")
538
  ):
539
  """
540
- Remove background from an image using RMBG-1.4 via HuggingFace Inference API.
541
 
542
  - **file**: Upload an image file (PNG, JPG, JPEG, WebP, BMP)
543
  - **bgcolor**: Background color after removal. Options:
@@ -578,8 +519,7 @@ async def remove_background(
578
  })
579
 
580
  try:
581
- output_image = hf_client.remove_background(image_bytes=contents)
582
-
583
  if bgcolor != "transparent":
584
  if bgcolor == "white":
585
  bg_color = (255, 255, 255, 255)
@@ -590,14 +530,17 @@ async def remove_background(
590
  if len(hex_color) == 6:
591
  r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
592
  bg_color = (r, g, b, 255)
593
- else:
594
- bg_color = (255, 255, 255, 255)
595
- else:
596
- bg_color = (255, 255, 255, 255)
597
-
598
- background = Image.new("RGBA", output_image.size, bg_color)
599
- background.paste(output_image, mask=output_image.split()[3] if output_image.mode == "RGBA" else None)
600
- output_image = background
 
 
 
601
 
602
  file_id = str(uuid.uuid4())
603
  output_path = OUTPUT_DIR / f"{file_id}_nobg.png"
@@ -619,9 +562,9 @@ async def remove_background_base64(
619
  ):
620
  """
621
  Remove background from an image and return as base64.
622
-
623
- Uses RMBG-1.4 via HuggingFace Inference API.
624
  """
 
 
625
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
626
  if file.content_type not in allowed_types:
627
  raise HTTPException(
@@ -633,8 +576,7 @@ async def remove_background_base64(
633
  contents = await file.read()
634
  input_image = Image.open(io.BytesIO(contents))
635
 
636
- output_image = hf_client.remove_background(image_bytes=contents)
637
-
638
  if bgcolor != "transparent":
639
  if bgcolor == "white":
640
  bg_color = (255, 255, 255, 255)
@@ -645,14 +587,16 @@ async def remove_background_base64(
645
  if len(hex_color) == 6:
646
  r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
647
  bg_color = (r, g, b, 255)
648
- else:
649
- bg_color = (255, 255, 255, 255)
650
- else:
651
- bg_color = (255, 255, 255, 255)
652
-
653
- background = Image.new("RGBA", output_image.size, bg_color)
654
- background.paste(output_image, mask=output_image.split()[3] if output_image.mode == "RGBA" else None)
655
- output_image = background
 
 
656
 
657
  buffer = io.BytesIO()
658
  output_image.save(buffer, format="PNG")
@@ -665,27 +609,48 @@ async def remove_background_base64(
665
  "image_base64": img_base64,
666
  "original_size": {"width": input_image.width, "height": input_image.height},
667
  "output_size": {"width": output_image.width, "height": output_image.height},
668
- "background": bgcolor,
669
- "model": "briaai/RMBG-1.4"
670
  })
671
 
672
  except Exception as e:
673
  raise HTTPException(status_code=500, detail=f"Error removing background: {str(e)}")
674
 
675
-
676
  def process_denoise_job(job_id: str, image_bytes: bytes, strength: int, output_path: Path):
 
677
  try:
 
678
  input_image = Image.open(io.BytesIO(image_bytes))
679
 
680
- def progress_callback(progress, message):
681
- tracker.update_progress(job_id, progress, message)
682
 
683
- output_image = hf_client.denoise_image(
684
- image=input_image,
685
- strength=strength,
686
- progress_callback=progress_callback
687
- )
688
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
  output_image.save(output_path, "PNG")
690
  tracker.complete_job(job_id, str(output_path), "Denoising complete")
691
 
@@ -773,7 +738,29 @@ async def denoise_image(
773
 
774
  try:
775
  input_image = Image.open(io.BytesIO(contents))
776
- output_image = hf_client.denoise_image(image=input_image, strength=strength)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777
 
778
  file_id = str(uuid.uuid4())
779
  output_path = OUTPUT_DIR / f"{file_id}_denoised.png"
@@ -796,6 +783,8 @@ async def denoise_image_base64(
796
  """
797
  Reduce noise in an image and return as base64.
798
  """
 
 
799
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
800
  if file.content_type not in allowed_types:
801
  raise HTTPException(
@@ -806,7 +795,29 @@ async def denoise_image_base64(
806
  try:
807
  contents = await file.read()
808
  input_image = Image.open(io.BytesIO(contents))
809
- output_image = hf_client.denoise_image(image=input_image, strength=strength)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
810
 
811
  buffer = io.BytesIO()
812
  output_image.save(buffer, format="PNG")
@@ -835,6 +846,7 @@ def get_doc_scanner():
835
  return doc_scanner
836
 
837
  def process_docscan_job(job_id: str, image_bytes: bytes, enhance_hd: bool, scale: int, output_path: Path):
 
838
  try:
839
  tracker.update_progress(job_id, 5.0, "Loading document image...")
840
  input_image = Image.open(io.BytesIO(image_bytes))
@@ -855,7 +867,7 @@ def process_docscan_job(job_id: str, image_bytes: bytes, enhance_hd: bool, scale
855
  scanner = get_doc_scanner()
856
 
857
  if enhance_hd:
858
- tracker.update_progress(job_id, 60.0, "Applying HD enhancement (HuggingFace AI)...")
859
  else:
860
  tracker.update_progress(job_id, 60.0, "Finalizing document...")
861
 
@@ -905,7 +917,7 @@ async def scan_document_async(
905
  @app.post("/docscan")
906
  async def scan_document(
907
  file: UploadFile = File(..., description="Document image to scan (PNG, JPG, JPEG, WebP, BMP)"),
908
- enhance_hd: bool = Query(default=True, description="Apply HD enhancement using AI (HuggingFace)"),
909
  scale: int = Query(default=2, ge=1, le=4, description="Upscale factor for HD enhancement (1-4)"),
910
  async_mode: bool = Query(default=False, description="Use async mode with progress tracking")
911
  ):
@@ -920,7 +932,7 @@ async def scan_document(
920
  - **Contrast enhancement**: Applies CLAHE for improved readability
921
  - **Noise reduction**: Uses bilateral filtering to reduce noise while preserving edges
922
  - **Sharpening**: Applies unsharp masking for crisp text without artifacts
923
- - **HD upscaling**: Uses HuggingFace SD Upscaler for high-definition output
924
 
925
  Parameters:
926
  - **file**: Upload a photo of a document (supports various angles and lighting)
@@ -997,7 +1009,10 @@ async def scan_document_base64(
997
  Scan and enhance a document image, returning the result as base64.
998
 
999
  Same processing as /docscan but returns base64-encoded image data.
 
1000
  """
 
 
1001
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
1002
  if file.content_type not in allowed_types:
1003
  raise HTTPException(
@@ -1042,7 +1057,7 @@ async def scan_document_base64(
1042
  "contrast_enhancement": "CLAHE",
1043
  "noise_reduction": "bilateral_filter",
1044
  "sharpening": "unsharp_mask",
1045
- "hd_upscaling": "HuggingFace SD Upscaler" if enhance_hd else "disabled"
1046
  }
1047
  })
1048
 
 
11
  from PIL import Image
12
  import numpy as np
13
  from progress_tracker import get_tracker, JobStatus
 
14
 
15
  UPLOAD_DIR = Path("uploads")
16
  OUTPUT_DIR = Path("outputs")
 
24
  description="""
25
  ## AI-Powered Image Processing API
26
 
27
+ A comprehensive image processing API with multiple AI-powered features.
28
 
29
  ### Features:
30
+ - **Image Upscaling**: Enhance image resolution up to 4x using Real-ESRGAN
31
+ - **Background Removal**: Remove backgrounds using rembg with BiRefNet model
 
32
  - **Noise Reduction**: Reduce image noise using advanced denoising algorithms
33
  - **Document Scanning**: Auto-crop, align, and enhance document photos with AI
34
+ - **Quality Enhancement**: Improve image clarity and reduce artifacts
35
 
36
  ### Supported Formats:
37
  - PNG, JPG, JPEG, WebP, BMP
38
 
39
+ ### Models Used:
40
+ - **Super Resolution**: Real-ESRGAN x4plus
41
+ - **Background Removal**: rembg with BiRefNet-massive model
 
42
  - **Noise Reduction**: OpenCV Non-Local Means Denoising
43
+ - **Document Scanner**: OpenCV edge detection + Real-ESRGAN upscaling
44
  """,
45
+ version="2.1.0",
46
  docs_url="/docs",
47
  redoc_url="/redoc",
48
  )
 
55
  allow_headers=["*"],
56
  )
57
 
58
+ enhancer = None
59
+ bg_remover_session = None
60
+
61
+ def get_enhancer():
62
+ global enhancer
63
+ if enhancer is None:
64
+ from enhancer import ImageEnhancer
65
+ enhancer = ImageEnhancer()
66
+ return enhancer
67
+
68
+ def get_bg_remover():
69
+ global bg_remover_session
70
+ if bg_remover_session is None:
71
+ from rembg import new_session
72
+ bg_remover_session = new_session("birefnet-general")
73
+ return bg_remover_session
74
+
75
  @app.get("/", response_class=HTMLResponse)
76
  async def home():
77
+ """Serve the main HTML page for testing image processing."""
78
  html_path = Path("templates/index.html")
79
  if html_path.exists():
80
  return html_path.read_text()
 
90
 
91
  @app.get("/health")
92
  async def health_check():
93
+ """Health check endpoint."""
94
  return {
95
+ "status": "healthy",
96
+ "version": "2.0.0",
97
+ "features": ["enhance", "remove-background", "denoise", "docscan", "progress-tracking"]
 
 
98
  }
99
 
100
  @app.get("/progress/{job_id}")
101
  async def get_progress(job_id: str):
102
+ """
103
+ Get the progress of an async image processing job.
104
+
105
+ - **job_id**: The job ID returned when starting an async processing request
106
+
107
+ Returns the current progress, status, and message for the job.
108
+ """
109
  progress = tracker.get_progress(job_id)
110
  if progress is None:
111
  raise HTTPException(status_code=404, detail="Job not found")
 
113
 
114
  @app.get("/result/{job_id}")
115
  async def get_result(job_id: str):
116
+ """
117
+ Get the result of a completed async job.
118
+
119
+ - **job_id**: The job ID returned when starting an async processing request
120
+
121
+ Returns the processed image as a file download if the job is complete.
122
+ """
123
  job = tracker.get_job(job_id)
124
  if job is None:
125
  raise HTTPException(status_code=404, detail="Job not found")
 
149
 
150
  @app.get("/model-info")
151
  async def model_info():
152
+ """Get information about the loaded AI models."""
153
+ return {
154
+ "models": {
155
+ "super_resolution": {
156
+ "name": "Real-ESRGAN x4plus",
157
+ "description": "State-of-the-art image super-resolution",
158
+ "upscale_factors": [2, 4],
159
+ "source": "https://github.com/xinntao/Real-ESRGAN"
160
+ },
161
+ "background_removal": {
162
+ "name": "BiRefNet-general",
163
+ "description": "High-accuracy background removal using bilateral reference network",
164
+ "source": "https://github.com/danielgatis/rembg"
165
+ },
166
+ "noise_reduction": {
167
+ "name": "Non-Local Means Denoising",
168
+ "description": "Advanced noise reduction algorithm",
169
+ "source": "OpenCV"
170
+ },
171
+ "document_scanner": {
172
+ "name": "AI Document Scanner",
173
+ "description": "Auto-crop, perspective correction, alignment, and HD enhancement",
174
+ "features": ["edge detection", "perspective transform", "CLAHE contrast", "bilateral denoising", "unsharp masking", "Real-ESRGAN upscaling"],
175
+ "source": "OpenCV + Real-ESRGAN"
176
+ }
177
+ },
178
+ "supported_formats": ["png", "jpg", "jpeg", "webp", "bmp"],
179
+ "max_input_size": "512x512 for fast processing (images auto-resized)"
180
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
  def process_enhance_job(job_id: str, image_bytes: bytes, scale: int, output_path: Path, filename: str):
183
+ """Background task to process image enhancement with progress tracking."""
184
  try:
185
  input_image = Image.open(io.BytesIO(image_bytes))
186
 
187
  if input_image.mode != "RGB":
188
  input_image = input_image.convert("RGB")
189
 
190
+ max_size = 512
191
+ if input_image.width > max_size or input_image.height > max_size:
192
+ ratio = min(max_size / input_image.width, max_size / input_image.height)
193
+ new_size = (int(input_image.width * ratio), int(input_image.height * ratio))
194
+ input_image = input_image.resize(new_size, Image.LANCZOS)
195
 
196
+ tracker.update_progress(job_id, 5.0, "Image loaded and preprocessed")
197
+
198
+ def progress_callback(progress, message, current_step, total_steps):
199
+ tracker.update_progress(job_id, progress, message, current_step, total_steps)
200
+
201
+ try:
202
+ enhancer_instance = get_enhancer()
203
+ enhanced_image = enhancer_instance.enhance(input_image, scale, progress_callback)
204
+ enhanced_image.save(output_path, "PNG")
205
+ except ImportError:
206
+ tracker.update_progress(job_id, 50.0, "Using fallback enhancer...")
207
+ enhanced_image = input_image.resize(
208
+ (input_image.width * scale, input_image.height * scale),
209
+ Image.LANCZOS
210
+ )
211
+ enhanced_image.save(output_path, "PNG")
212
 
 
213
  tracker.complete_job(job_id, str(output_path), f"Enhanced to {enhanced_image.width}x{enhanced_image.height}")
214
 
215
  except Exception as e:
 
219
  async def enhance_image_async(
220
  background_tasks: BackgroundTasks,
221
  file: UploadFile = File(..., description="Image file to enhance (PNG, JPG, JPEG, WebP, BMP)"),
222
+ scale: int = Query(default=2, ge=2, le=4, description="Upscale factor (2 or 4)")
223
  ):
224
  """
225
  Start async image enhancement with progress tracking.
226
 
227
+ - **file**: Upload an image file (PNG, JPG, JPEG, WebP, BMP)
228
+ - **scale**: Upscaling factor - 2 for 2x resolution, 4 for 4x resolution
229
 
230
+ Returns a job_id that can be used to track progress via /progress/{job_id}
231
+ and retrieve the result via /result/{job_id}
232
  """
233
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
234
  if file.content_type not in allowed_types:
 
259
  @app.post("/enhance")
260
  async def enhance_image(
261
  file: UploadFile = File(..., description="Image file to enhance (PNG, JPG, JPEG, WebP, BMP)"),
262
+ scale: int = Query(default=2, ge=2, le=4, description="Upscale factor (2 or 4)"),
263
  async_mode: bool = Query(default=False, description="Use async mode with progress tracking")
264
  ):
265
  """
266
+ Enhance an image using Real-ESRGAN AI model.
267
 
268
  - **file**: Upload an image file (PNG, JPG, JPEG, WebP, BMP)
269
+ - **scale**: Upscaling factor - 2 for 2x resolution, 4 for 4x resolution
270
  - **async_mode**: If true, returns job_id for progress tracking instead of waiting
271
 
272
  Returns the enhanced image as a PNG file (or job_id if async_mode=true).
 
305
  if input_image.mode != "RGB":
306
  input_image = input_image.convert("RGB")
307
 
308
+ max_size = 512
309
+ if input_image.width > max_size or input_image.height > max_size:
310
+ ratio = min(max_size / input_image.width, max_size / input_image.height)
311
+ new_size = (int(input_image.width * ratio), int(input_image.height * ratio))
312
+ input_image = input_image.resize(new_size, Image.LANCZOS)
313
 
314
  file_id = str(uuid.uuid4())
315
  output_path = OUTPUT_DIR / f"{file_id}_enhanced.png"
316
+
317
+ try:
318
+ import concurrent.futures
319
+ enhancer_instance = get_enhancer()
320
+ with concurrent.futures.ThreadPoolExecutor() as executor:
321
+ future = executor.submit(enhancer_instance.enhance, input_image, scale)
322
+ enhanced_image = future.result(timeout=300)
323
+ enhanced_image.save(output_path, "PNG")
324
+ except ImportError:
325
+ enhanced_image = input_image.resize(
326
+ (input_image.width * scale, input_image.height * scale),
327
+ Image.LANCZOS
328
+ )
329
+ enhanced_image.save(output_path, "PNG")
330
 
331
  return FileResponse(
332
  output_path,
 
340
  @app.post("/enhance/base64")
341
  async def enhance_image_base64(
342
  file: UploadFile = File(..., description="Image file to enhance"),
343
+ scale: int = Query(default=2, ge=2, le=4, description="Upscale factor (2 or 4)")
344
  ):
345
  """
346
  Enhance an image and return it as base64-encoded string.
347
 
348
+ Useful for integrations that prefer base64 over file downloads.
349
  """
350
+ import base64
351
+
352
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
353
  if file.content_type not in allowed_types:
354
  raise HTTPException(
 
363
  if input_image.mode != "RGB":
364
  input_image = input_image.convert("RGB")
365
 
366
+ max_size = 512
367
+ if input_image.width > max_size or input_image.height > max_size:
368
+ ratio = min(max_size / input_image.width, max_size / input_image.height)
369
+ new_size = (int(input_image.width * ratio), int(input_image.height * ratio))
370
+ input_image = input_image.resize(new_size, Image.LANCZOS)
371
+
372
+ try:
373
+ import concurrent.futures
374
+ enhancer_instance = get_enhancer()
375
+ with concurrent.futures.ThreadPoolExecutor() as executor:
376
+ future = executor.submit(enhancer_instance.enhance, input_image, scale)
377
+ enhanced_image = future.result(timeout=300)
378
+ except ImportError:
379
+ enhanced_image = input_image.resize(
380
+ (input_image.width * scale, input_image.height * scale),
381
+ Image.LANCZOS
382
+ )
383
 
384
  buffer = io.BytesIO()
385
  enhanced_image.save(buffer, format="PNG")
 
392
  "image_base64": img_base64,
393
  "original_size": {"width": input_image.width, "height": input_image.height},
394
  "enhanced_size": {"width": enhanced_image.width, "height": enhanced_image.height},
395
+ "scale_factor": scale
 
396
  })
397
 
398
  except Exception as e:
399
  raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
400
 
 
401
  def process_remove_bg_job(job_id: str, image_bytes: bytes, bgcolor: str, output_path: Path):
402
+ """Background task for removing background with progress tracking."""
403
  try:
404
+ tracker.update_progress(job_id, 10.0, "Loading image...")
 
 
 
 
 
 
405
 
406
+ bg_color = None
407
  if bgcolor != "transparent":
 
 
 
408
  if bgcolor == "white":
409
  bg_color = (255, 255, 255, 255)
410
  elif bgcolor == "black":
 
414
  if len(hex_color) == 6:
415
  r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
416
  bg_color = (r, g, b, 255)
 
 
 
 
 
 
 
 
417
 
418
+ tracker.update_progress(job_id, 20.0, "Initializing AI model...")
419
+
420
+ try:
421
+ from rembg import remove
422
+ tracker.update_progress(job_id, 40.0, "Removing background...")
423
+ session = get_bg_remover()
424
+ tracker.update_progress(job_id, 60.0, "Processing...")
425
+ output_data = remove(image_bytes, session=session, bgcolor=bg_color)
426
+ output_image = Image.open(io.BytesIO(output_data))
427
+ except ImportError:
428
+ tracker.update_progress(job_id, 50.0, "Using fallback (no rembg)...")
429
+ input_image = Image.open(io.BytesIO(image_bytes))
430
+ if input_image.mode != "RGBA":
431
+ input_image = input_image.convert("RGBA")
432
+ output_image = input_image
433
+
434
+ tracker.update_progress(job_id, 90.0, "Saving result...")
435
  output_image.save(output_path, "PNG")
436
  tracker.complete_job(job_id, str(output_path), "Background removed successfully")
437
 
 
446
  """
447
  Start async background removal with progress tracking.
448
 
 
 
449
  Returns a job_id for progress tracking via /progress/{job_id}
450
  """
451
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
 
478
  async_mode: bool = Query(default=False, description="Use async mode with progress tracking")
479
  ):
480
  """
481
+ Remove background from an image using AI.
482
 
483
  - **file**: Upload an image file (PNG, JPG, JPEG, WebP, BMP)
484
  - **bgcolor**: Background color after removal. Options:
 
519
  })
520
 
521
  try:
522
+ bg_color = None
 
523
  if bgcolor != "transparent":
524
  if bgcolor == "white":
525
  bg_color = (255, 255, 255, 255)
 
530
  if len(hex_color) == 6:
531
  r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
532
  bg_color = (r, g, b, 255)
533
+
534
+ try:
535
+ from rembg import remove
536
+ session = get_bg_remover()
537
+ output_data = remove(contents, session=session, bgcolor=bg_color)
538
+ output_image = Image.open(io.BytesIO(output_data))
539
+ except ImportError:
540
+ input_image = Image.open(io.BytesIO(contents))
541
+ if input_image.mode != "RGBA":
542
+ input_image = input_image.convert("RGBA")
543
+ output_image = input_image
544
 
545
  file_id = str(uuid.uuid4())
546
  output_path = OUTPUT_DIR / f"{file_id}_nobg.png"
 
562
  ):
563
  """
564
  Remove background from an image and return as base64.
 
 
565
  """
566
+ import base64
567
+
568
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
569
  if file.content_type not in allowed_types:
570
  raise HTTPException(
 
576
  contents = await file.read()
577
  input_image = Image.open(io.BytesIO(contents))
578
 
579
+ bg_color = None
 
580
  if bgcolor != "transparent":
581
  if bgcolor == "white":
582
  bg_color = (255, 255, 255, 255)
 
587
  if len(hex_color) == 6:
588
  r, g, b = int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)
589
  bg_color = (r, g, b, 255)
590
+
591
+ try:
592
+ from rembg import remove
593
+ session = get_bg_remover()
594
+ output_data = remove(contents, session=session, bgcolor=bg_color)
595
+ output_image = Image.open(io.BytesIO(output_data))
596
+ except ImportError:
597
+ if input_image.mode != "RGBA":
598
+ input_image = input_image.convert("RGBA")
599
+ output_image = input_image
600
 
601
  buffer = io.BytesIO()
602
  output_image.save(buffer, format="PNG")
 
609
  "image_base64": img_base64,
610
  "original_size": {"width": input_image.width, "height": input_image.height},
611
  "output_size": {"width": output_image.width, "height": output_image.height},
612
+ "background": bgcolor
 
613
  })
614
 
615
  except Exception as e:
616
  raise HTTPException(status_code=500, detail=f"Error removing background: {str(e)}")
617
 
 
618
  def process_denoise_job(job_id: str, image_bytes: bytes, strength: int, output_path: Path):
619
+ """Background task for denoising with progress tracking."""
620
  try:
621
+ tracker.update_progress(job_id, 10.0, "Loading image...")
622
  input_image = Image.open(io.BytesIO(image_bytes))
623
 
624
+ if input_image.mode != "RGB":
625
+ input_image = input_image.convert("RGB")
626
 
627
+ tracker.update_progress(job_id, 20.0, "Applying denoising filter...")
 
 
 
 
628
 
629
+ try:
630
+ import cv2
631
+ tracker.update_progress(job_id, 30.0, "Using OpenCV Non-Local Means...")
632
+ img_array = np.array(input_image)
633
+ img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
634
+
635
+ tracker.update_progress(job_id, 50.0, "Processing...")
636
+ denoised_bgr = cv2.fastNlMeansDenoisingColored(
637
+ img_bgr,
638
+ None,
639
+ h=strength,
640
+ hForColorComponents=strength,
641
+ templateWindowSize=7,
642
+ searchWindowSize=21
643
+ )
644
+
645
+ tracker.update_progress(job_id, 80.0, "Converting result...")
646
+ denoised_rgb = cv2.cvtColor(denoised_bgr, cv2.COLOR_BGR2RGB)
647
+ output_image = Image.fromarray(denoised_rgb)
648
+ except ImportError:
649
+ tracker.update_progress(job_id, 50.0, "Using PIL fallback...")
650
+ from PIL import ImageFilter
651
+ output_image = input_image.filter(ImageFilter.SMOOTH_MORE)
652
+
653
+ tracker.update_progress(job_id, 90.0, "Saving result...")
654
  output_image.save(output_path, "PNG")
655
  tracker.complete_job(job_id, str(output_path), "Denoising complete")
656
 
 
738
 
739
  try:
740
  input_image = Image.open(io.BytesIO(contents))
741
+
742
+ if input_image.mode != "RGB":
743
+ input_image = input_image.convert("RGB")
744
+
745
+ try:
746
+ import cv2
747
+ img_array = np.array(input_image)
748
+ img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
749
+
750
+ denoised_bgr = cv2.fastNlMeansDenoisingColored(
751
+ img_bgr,
752
+ None,
753
+ h=strength,
754
+ hForColorComponents=strength,
755
+ templateWindowSize=7,
756
+ searchWindowSize=21
757
+ )
758
+
759
+ denoised_rgb = cv2.cvtColor(denoised_bgr, cv2.COLOR_BGR2RGB)
760
+ output_image = Image.fromarray(denoised_rgb)
761
+ except ImportError:
762
+ from PIL import ImageFilter
763
+ output_image = input_image.filter(ImageFilter.SMOOTH_MORE)
764
 
765
  file_id = str(uuid.uuid4())
766
  output_path = OUTPUT_DIR / f"{file_id}_denoised.png"
 
783
  """
784
  Reduce noise in an image and return as base64.
785
  """
786
+ import base64
787
+
788
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
789
  if file.content_type not in allowed_types:
790
  raise HTTPException(
 
795
  try:
796
  contents = await file.read()
797
  input_image = Image.open(io.BytesIO(contents))
798
+
799
+ if input_image.mode != "RGB":
800
+ input_image = input_image.convert("RGB")
801
+
802
+ try:
803
+ import cv2
804
+ img_array = np.array(input_image)
805
+ img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
806
+
807
+ denoised_bgr = cv2.fastNlMeansDenoisingColored(
808
+ img_bgr,
809
+ None,
810
+ h=strength,
811
+ hForColorComponents=strength,
812
+ templateWindowSize=7,
813
+ searchWindowSize=21
814
+ )
815
+
816
+ denoised_rgb = cv2.cvtColor(denoised_bgr, cv2.COLOR_BGR2RGB)
817
+ output_image = Image.fromarray(denoised_rgb)
818
+ except ImportError:
819
+ from PIL import ImageFilter
820
+ output_image = input_image.filter(ImageFilter.SMOOTH_MORE)
821
 
822
  buffer = io.BytesIO()
823
  output_image.save(buffer, format="PNG")
 
846
  return doc_scanner
847
 
848
  def process_docscan_job(job_id: str, image_bytes: bytes, enhance_hd: bool, scale: int, output_path: Path):
849
+ """Background task for document scanning with progress tracking."""
850
  try:
851
  tracker.update_progress(job_id, 5.0, "Loading document image...")
852
  input_image = Image.open(io.BytesIO(image_bytes))
 
867
  scanner = get_doc_scanner()
868
 
869
  if enhance_hd:
870
+ tracker.update_progress(job_id, 60.0, "Applying HD enhancement (AI upscaling)...")
871
  else:
872
  tracker.update_progress(job_id, 60.0, "Finalizing document...")
873
 
 
917
  @app.post("/docscan")
918
  async def scan_document(
919
  file: UploadFile = File(..., description="Document image to scan (PNG, JPG, JPEG, WebP, BMP)"),
920
+ enhance_hd: bool = Query(default=True, description="Apply HD enhancement using AI (Real-ESRGAN)"),
921
  scale: int = Query(default=2, ge=1, le=4, description="Upscale factor for HD enhancement (1-4)"),
922
  async_mode: bool = Query(default=False, description="Use async mode with progress tracking")
923
  ):
 
932
  - **Contrast enhancement**: Applies CLAHE for improved readability
933
  - **Noise reduction**: Uses bilateral filtering to reduce noise while preserving edges
934
  - **Sharpening**: Applies unsharp masking for crisp text without artifacts
935
+ - **HD upscaling**: Optionally uses Real-ESRGAN for high-definition output
936
 
937
  Parameters:
938
  - **file**: Upload a photo of a document (supports various angles and lighting)
 
1009
  Scan and enhance a document image, returning the result as base64.
1010
 
1011
  Same processing as /docscan but returns base64-encoded image data.
1012
+ Useful for integrations that prefer base64 over file downloads.
1013
  """
1014
+ import base64
1015
+
1016
  allowed_types = ["image/png", "image/jpeg", "image/jpg", "image/webp", "image/bmp"]
1017
  if file.content_type not in allowed_types:
1018
  raise HTTPException(
 
1057
  "contrast_enhancement": "CLAHE",
1058
  "noise_reduction": "bilateral_filter",
1059
  "sharpening": "unsharp_mask",
1060
+ "hd_upscaling": "Real-ESRGAN" if enhance_hd else "disabled"
1061
  }
1062
  })
1063
 
document_scanner.py CHANGED
@@ -153,11 +153,12 @@ class DocumentScanner:
153
 
154
  if enhance_hd:
155
  try:
156
- import hf_client
157
- hd_image = hf_client.upscale_image(brightened, scale=scale)
 
158
  return hd_image
159
  except Exception as e:
160
- print(f"[DocScan] Using fallback upscaling: {e}")
161
  new_size = (brightened.width * scale, brightened.height * scale)
162
  hd_image = brightened.resize(new_size, Image.LANCZOS)
163
  return self.enhance_sharpness(hd_image, amount=0.5)
 
153
 
154
  if enhance_hd:
155
  try:
156
+ from enhancer import ImageEnhancer
157
+ ai_enhancer = ImageEnhancer()
158
+ hd_image = ai_enhancer.enhance(brightened, scale=scale)
159
  return hd_image
160
  except Exception as e:
161
+ print(f"[DocScan] Using fallback upscaling (AI models load on Hugging Face deployment)")
162
  new_size = (brightened.width * scale, brightened.height * scale)
163
  hd_image = brightened.resize(new_size, Image.LANCZOS)
164
  return self.enhance_sharpness(hd_image, amount=0.5)
hf_client.py DELETED
@@ -1,391 +0,0 @@
1
- import os
2
- import io
3
- import time
4
- import base64
5
- import requests
6
- from PIL import Image
7
- from typing import Optional, Tuple, Callable
8
-
9
- HF_TOKEN = os.environ.get("HF_TOKEN")
10
- HF_API_BASE = "https://api-inference.huggingface.co/models"
11
-
12
- MODELS = {
13
- "text_to_image": "stabilityai/stable-diffusion-xl-base-1.0",
14
- "upscaler": "stabilityai/stable-diffusion-x4-upscaler",
15
- "background_removal": "briaai/RMBG-1.4",
16
- }
17
-
18
- def get_headers():
19
- if not HF_TOKEN:
20
- raise ValueError("HF_TOKEN environment variable is not set")
21
- return {"Authorization": f"Bearer {HF_TOKEN}"}
22
-
23
-
24
- def wait_for_model(model_id: str, max_retries: int = 10, retry_delay: float = 5.0) -> bool:
25
- url = f"{HF_API_BASE}/{model_id}"
26
- headers = get_headers()
27
-
28
- for attempt in range(max_retries):
29
- try:
30
- response = requests.post(
31
- url,
32
- headers=headers,
33
- json={"inputs": "test", "options": {"wait_for_model": True}},
34
- timeout=30
35
- )
36
- if response.status_code == 200:
37
- return True
38
- elif response.status_code == 503:
39
- time.sleep(retry_delay)
40
- continue
41
- else:
42
- return True
43
- except requests.exceptions.Timeout:
44
- time.sleep(retry_delay)
45
- continue
46
- return False
47
-
48
-
49
- def generate_image(
50
- prompt: str,
51
- negative_prompt: str = "",
52
- width: int = 1024,
53
- height: int = 1024,
54
- guidance_scale: float = 7.5,
55
- num_inference_steps: int = 50,
56
- progress_callback: Optional[Callable] = None
57
- ) -> Image.Image:
58
- model_id = MODELS["text_to_image"]
59
- url = f"{HF_API_BASE}/{model_id}"
60
- headers = get_headers()
61
-
62
- if progress_callback:
63
- progress_callback(10.0, "Connecting to AI model...")
64
-
65
- payload = {
66
- "inputs": prompt,
67
- "parameters": {
68
- "negative_prompt": negative_prompt,
69
- "width": width,
70
- "height": height,
71
- "guidance_scale": guidance_scale,
72
- "num_inference_steps": num_inference_steps,
73
- },
74
- "options": {
75
- "wait_for_model": True,
76
- "use_cache": False
77
- }
78
- }
79
-
80
- if progress_callback:
81
- progress_callback(20.0, "Sending request to Stable Diffusion XL...")
82
-
83
- max_retries = 3
84
- retry_delay = 10.0
85
-
86
- for attempt in range(max_retries):
87
- try:
88
- response = requests.post(url, headers=headers, json=payload, timeout=300)
89
-
90
- if response.status_code == 200:
91
- if progress_callback:
92
- progress_callback(90.0, "Processing response...")
93
- image = Image.open(io.BytesIO(response.content))
94
- if progress_callback:
95
- progress_callback(100.0, "Image generated successfully!")
96
- return image
97
- elif response.status_code == 503:
98
- if progress_callback:
99
- progress_callback(30.0 + attempt * 10, f"Model loading... (attempt {attempt + 1}/{max_retries})")
100
- time.sleep(retry_delay)
101
- continue
102
- else:
103
- error_msg = response.json().get("error", response.text)
104
- raise Exception(f"API error: {error_msg}")
105
- except requests.exceptions.Timeout:
106
- if attempt < max_retries - 1:
107
- time.sleep(retry_delay)
108
- continue
109
- raise Exception("Request timed out")
110
-
111
- raise Exception("Failed to generate image after multiple retries")
112
-
113
-
114
- def upscale_image(
115
- image: Image.Image,
116
- prompt: str = "high quality, detailed, sharp",
117
- scale: int = 4,
118
- progress_callback: Optional[Callable] = None
119
- ) -> Image.Image:
120
- model_id = MODELS["upscaler"]
121
- url = f"{HF_API_BASE}/{model_id}"
122
- headers = get_headers()
123
- headers["Content-Type"] = "application/json"
124
-
125
- if progress_callback:
126
- progress_callback(10.0, "Preparing image for upscaling...")
127
-
128
- if image.mode != "RGB":
129
- image = image.convert("RGB")
130
-
131
- max_dim = 128
132
- if image.width > max_dim or image.height > max_dim:
133
- ratio = min(max_dim / image.width, max_dim / image.height)
134
- new_size = (int(image.width * ratio), int(image.height * ratio))
135
- image = image.resize(new_size, Image.LANCZOS)
136
-
137
- buffer = io.BytesIO()
138
- image.save(buffer, format="PNG")
139
- buffer.seek(0)
140
- img_base64 = base64.b64encode(buffer.getvalue()).decode("utf-8")
141
-
142
- if progress_callback:
143
- progress_callback(20.0, "Sending to SD x4 Upscaler...")
144
-
145
- payload = {
146
- "inputs": {
147
- "image": img_base64,
148
- "prompt": prompt,
149
- },
150
- "parameters": {
151
- "num_inference_steps": 75,
152
- "guidance_scale": 9.0,
153
- },
154
- "options": {
155
- "wait_for_model": True
156
- }
157
- }
158
-
159
- max_retries = 3
160
- retry_delay = 15.0
161
-
162
- for attempt in range(max_retries):
163
- try:
164
- if progress_callback:
165
- progress_callback(30.0 + attempt * 15, f"Processing with AI... (attempt {attempt + 1})")
166
-
167
- response = requests.post(url, headers=headers, json=payload, timeout=300)
168
-
169
- if response.status_code == 200:
170
- if progress_callback:
171
- progress_callback(90.0, "Finalizing upscaled image...")
172
- upscaled = Image.open(io.BytesIO(response.content))
173
- if progress_callback:
174
- progress_callback(100.0, "Upscaling complete!")
175
- return upscaled
176
- elif response.status_code == 503:
177
- if progress_callback:
178
- progress_callback(30.0 + attempt * 10, "Model loading, please wait...")
179
- time.sleep(retry_delay)
180
- continue
181
- elif response.status_code == 422:
182
- if progress_callback:
183
- progress_callback(40.0, "Using fallback upscaling method...")
184
- new_size = (image.width * scale, image.height * scale)
185
- return image.resize(new_size, Image.LANCZOS)
186
- else:
187
- error_info = response.json() if response.headers.get("content-type", "").startswith("application/json") else {"error": response.text}
188
- error_msg = error_info.get("error", str(error_info))
189
- if attempt < max_retries - 1:
190
- time.sleep(retry_delay)
191
- continue
192
- if progress_callback:
193
- progress_callback(50.0, "Using fallback method...")
194
- new_size = (image.width * scale, image.height * scale)
195
- return image.resize(new_size, Image.LANCZOS)
196
- except requests.exceptions.Timeout:
197
- if attempt < max_retries - 1:
198
- time.sleep(retry_delay)
199
- continue
200
- if progress_callback:
201
- progress_callback(50.0, "Timeout, using fallback...")
202
- new_size = (image.width * scale, image.height * scale)
203
- return image.resize(new_size, Image.LANCZOS)
204
-
205
- new_size = (image.width * scale, image.height * scale)
206
- return image.resize(new_size, Image.LANCZOS)
207
-
208
-
209
- def remove_background(
210
- image_bytes: bytes,
211
- progress_callback: Optional[Callable] = None
212
- ) -> Image.Image:
213
- model_id = MODELS["background_removal"]
214
- url = f"{HF_API_BASE}/{model_id}"
215
- headers = get_headers()
216
-
217
- if progress_callback:
218
- progress_callback(10.0, "Preparing image...")
219
-
220
- if progress_callback:
221
- progress_callback(20.0, "Sending to RMBG-1.4 model...")
222
-
223
- max_retries = 3
224
- retry_delay = 10.0
225
-
226
- for attempt in range(max_retries):
227
- try:
228
- if progress_callback:
229
- progress_callback(30.0 + attempt * 15, f"Processing... (attempt {attempt + 1})")
230
-
231
- response = requests.post(
232
- url,
233
- headers=headers,
234
- data=image_bytes,
235
- timeout=120
236
- )
237
-
238
- if response.status_code == 200:
239
- content_type = response.headers.get("content-type", "")
240
-
241
- if "image" in content_type:
242
- if progress_callback:
243
- progress_callback(90.0, "Processing mask...")
244
- mask = Image.open(io.BytesIO(response.content))
245
- original = Image.open(io.BytesIO(image_bytes))
246
-
247
- if original.mode != "RGBA":
248
- original = original.convert("RGBA")
249
-
250
- if mask.mode != "L":
251
- mask = mask.convert("L")
252
-
253
- if mask.size != original.size:
254
- mask = mask.resize(original.size, Image.LANCZOS)
255
-
256
- original.putalpha(mask)
257
-
258
- if progress_callback:
259
- progress_callback(100.0, "Background removed!")
260
- return original
261
- else:
262
- result = response.json()
263
- if isinstance(result, list) and len(result) > 0:
264
- if progress_callback:
265
- progress_callback(90.0, "Processing segmentation result...")
266
- original = Image.open(io.BytesIO(image_bytes))
267
- if original.mode != "RGBA":
268
- original = original.convert("RGBA")
269
- if progress_callback:
270
- progress_callback(100.0, "Background removed!")
271
- return original
272
- raise Exception(f"Unexpected response format: {result}")
273
- elif response.status_code == 503:
274
- if progress_callback:
275
- progress_callback(30.0 + attempt * 10, "Model loading...")
276
- time.sleep(retry_delay)
277
- continue
278
- else:
279
- error_msg = response.text
280
- try:
281
- error_msg = response.json().get("error", response.text)
282
- except:
283
- pass
284
- if attempt < max_retries - 1:
285
- time.sleep(retry_delay)
286
- continue
287
- raise Exception(f"API error: {error_msg}")
288
- except requests.exceptions.Timeout:
289
- if attempt < max_retries - 1:
290
- time.sleep(retry_delay)
291
- continue
292
- raise Exception("Request timed out")
293
-
294
- raise Exception("Failed to remove background after multiple retries")
295
-
296
-
297
- def denoise_image(
298
- image: Image.Image,
299
- strength: int = 10,
300
- progress_callback: Optional[Callable] = None
301
- ) -> Image.Image:
302
- if progress_callback:
303
- progress_callback(10.0, "Loading image...")
304
-
305
- if image.mode != "RGB":
306
- image = image.convert("RGB")
307
-
308
- if progress_callback:
309
- progress_callback(30.0, "Applying denoising filter...")
310
-
311
- try:
312
- import cv2
313
- import numpy as np
314
-
315
- if progress_callback:
316
- progress_callback(50.0, "Using OpenCV Non-Local Means...")
317
-
318
- img_array = np.array(image)
319
- img_bgr = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
320
-
321
- denoised_bgr = cv2.fastNlMeansDenoisingColored(
322
- img_bgr,
323
- None,
324
- h=strength,
325
- hForColorComponents=strength,
326
- templateWindowSize=7,
327
- searchWindowSize=21
328
- )
329
-
330
- if progress_callback:
331
- progress_callback(80.0, "Converting result...")
332
-
333
- denoised_rgb = cv2.cvtColor(denoised_bgr, cv2.COLOR_BGR2RGB)
334
- output_image = Image.fromarray(denoised_rgb)
335
-
336
- if progress_callback:
337
- progress_callback(100.0, "Denoising complete!")
338
-
339
- return output_image
340
- except ImportError:
341
- if progress_callback:
342
- progress_callback(50.0, "Using PIL fallback...")
343
- from PIL import ImageFilter
344
- output_image = image.filter(ImageFilter.SMOOTH_MORE)
345
- if progress_callback:
346
- progress_callback(100.0, "Denoising complete!")
347
- return output_image
348
-
349
-
350
- def get_model_info():
351
- return {
352
- "models": {
353
- "text_to_image": {
354
- "name": "Stable Diffusion XL",
355
- "model_id": MODELS["text_to_image"],
356
- "description": "State-of-the-art text-to-image generation with 1024x1024 native resolution",
357
- "provider": "HuggingFace Inference API",
358
- "source": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0"
359
- },
360
- "super_resolution": {
361
- "name": "Stable Diffusion x4 Upscaler",
362
- "model_id": MODELS["upscaler"],
363
- "description": "AI-powered 4x image upscaling with diffusion models",
364
- "provider": "HuggingFace Inference API",
365
- "source": "https://huggingface.co/stabilityai/stable-diffusion-x4-upscaler"
366
- },
367
- "background_removal": {
368
- "name": "RMBG-1.4",
369
- "model_id": MODELS["background_removal"],
370
- "description": "State-of-the-art background removal trained on diverse content",
371
- "provider": "HuggingFace Inference API",
372
- "source": "https://huggingface.co/briaai/RMBG-1.4"
373
- },
374
- "noise_reduction": {
375
- "name": "OpenCV Non-Local Means",
376
- "description": "Advanced noise reduction algorithm",
377
- "provider": "Local Processing",
378
- "source": "OpenCV"
379
- },
380
- "document_scanner": {
381
- "name": "AI Document Scanner",
382
- "description": "Edge detection, perspective correction, and enhancement",
383
- "features": ["auto-crop", "perspective transform", "CLAHE contrast", "bilateral denoising"],
384
- "provider": "Local Processing + HuggingFace",
385
- "source": "OpenCV + SD Upscaler"
386
- }
387
- },
388
- "api_provider": "HuggingFace Inference API",
389
- "supported_formats": ["png", "jpg", "jpeg", "webp", "bmp"],
390
- "authentication": "Bearer token required (HF_TOKEN)"
391
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
my_ssh_key.txt ADDED
File without changes
requirements.txt CHANGED
@@ -11,9 +11,3 @@ basicsr==1.4.2
11
  gfpgan==1.3.8
12
  rembg==2.0.50
13
  onnxruntime==1.17.0
14
- fastapi
15
- opencv-python
16
- pillow
17
- python-multipart
18
- requests
19
- uvicorn
 
11
  gfpgan==1.3.8
12
  rembg==2.0.50
13
  onnxruntime==1.17.0
 
 
 
 
 
 
templates/index.html CHANGED
@@ -43,17 +43,6 @@
43
  font-size: 1.1rem;
44
  }
45
 
46
- .powered-by {
47
- margin-top: 10px;
48
- color: #666;
49
- font-size: 0.9rem;
50
- }
51
-
52
- .powered-by a {
53
- color: #00d2ff;
54
- text-decoration: none;
55
- }
56
-
57
  .api-link {
58
  margin-top: 15px;
59
  }
@@ -125,7 +114,7 @@
125
  color: #888;
126
  }
127
 
128
- select, input[type="text"], textarea {
129
  width: 100%;
130
  padding: 12px;
131
  border-radius: 8px;
@@ -135,11 +124,6 @@
135
  font-size: 1rem;
136
  }
137
 
138
- textarea {
139
- resize: vertical;
140
- min-height: 80px;
141
- }
142
-
143
  .feature-tabs {
144
  display: flex;
145
  gap: 10px;
@@ -217,10 +201,6 @@
217
  gap: 20px;
218
  }
219
 
220
- .image-comparison.single {
221
- grid-template-columns: 1fr;
222
- }
223
-
224
  @media (max-width: 600px) {
225
  .image-comparison {
226
  grid-template-columns: 1fr;
@@ -375,22 +355,13 @@
375
  background-position: 0 0, 0 10px, 10px -10px, -10px 0px;
376
  background-color: #444;
377
  }
378
-
379
- .prompt-section {
380
- margin-bottom: 20px;
381
- }
382
-
383
- .prompt-section textarea {
384
- margin-bottom: 10px;
385
- }
386
  </style>
387
  </head>
388
  <body>
389
  <div class="container">
390
  <header>
391
  <h1>AI Image Processing</h1>
392
- <p class="subtitle">Generate, enhance, remove backgrounds, denoise, and scan documents with AI</p>
393
- <p class="powered-by">Powered by <a href="https://huggingface.co/inference-api" target="_blank">HuggingFace Inference API</a></p>
394
  <div class="api-link">
395
  <a href="/docs" target="_blank">View API Documentation</a>
396
  </div>
@@ -398,71 +369,20 @@
398
 
399
  <section class="upload-section">
400
  <div class="feature-tabs">
401
- <button class="feature-tab active" data-feature="generate">Generate</button>
402
- <button class="feature-tab" data-feature="enhance">Enhance</button>
403
  <button class="feature-tab" data-feature="remove-bg">Remove Background</button>
404
  <button class="feature-tab" data-feature="denoise">Denoise</button>
405
  <button class="feature-tab" data-feature="docscan">Doc Scan</button>
406
  </div>
407
 
408
- <div id="generateOptions" class="feature-options active">
409
- <div class="prompt-section">
410
- <label for="prompt">Describe the image you want to create</label>
411
- <textarea id="prompt" placeholder="A majestic lion in a savanna at sunset, photorealistic, 8k, detailed"></textarea>
412
- </div>
413
- <div class="prompt-section">
414
- <label for="negativePrompt">What to avoid (optional)</label>
415
- <textarea id="negativePrompt" placeholder="blurry, low quality, distorted, ugly"></textarea>
416
- </div>
417
- <div class="options">
418
- <div class="option-group">
419
- <label for="genWidth">Width</label>
420
- <select id="genWidth">
421
- <option value="512">512px</option>
422
- <option value="768">768px</option>
423
- <option value="1024" selected>1024px</option>
424
- </select>
425
- </div>
426
- <div class="option-group">
427
- <label for="genHeight">Height</label>
428
- <select id="genHeight">
429
- <option value="512">512px</option>
430
- <option value="768">768px</option>
431
- <option value="1024" selected>1024px</option>
432
- </select>
433
- </div>
434
- <div class="option-group">
435
- <label for="guidanceScale">Guidance Scale</label>
436
- <select id="guidanceScale">
437
- <option value="5">5 - More creative</option>
438
- <option value="7.5" selected>7.5 - Balanced</option>
439
- <option value="10">10 - Follow prompt closely</option>
440
- <option value="15">15 - Very strict</option>
441
- </select>
442
- </div>
443
- <div class="option-group">
444
- <label for="steps">Steps</label>
445
- <select id="steps">
446
- <option value="20">20 - Fast</option>
447
- <option value="30">30 - Quick</option>
448
- <option value="50" selected>50 - Balanced</option>
449
- <option value="75">75 - High quality</option>
450
- </select>
451
- </div>
452
- </div>
453
- <p style="color: #888; font-size: 0.85rem; margin-top: 10px;">
454
- Uses Stable Diffusion XL for high-quality image generation.
455
- </p>
456
- </div>
457
-
458
- <div id="uploadZone" class="drop-zone" style="display: none;">
459
  <div class="drop-zone-icon">📷</div>
460
  <p>Drag & drop an image here or click to select</p>
461
  <p><small>Supports: PNG, JPG, JPEG, WebP, BMP</small></p>
462
  </div>
463
  <input type="file" id="fileInput" accept="image/png,image/jpeg,image/jpg,image/webp,image/bmp">
464
 
465
- <div id="enhanceOptions" class="feature-options">
466
  <div class="options">
467
  <div class="option-group">
468
  <label for="scale">Upscale Factor</label>
@@ -472,9 +392,6 @@
472
  </select>
473
  </div>
474
  </div>
475
- <p style="color: #888; font-size: 0.85rem; margin-top: 10px;">
476
- Uses SD x4 Upscaler via HuggingFace for AI-powered upscaling.
477
- </p>
478
  </div>
479
 
480
  <div id="removeBgOptions" class="feature-options">
@@ -493,9 +410,6 @@
493
  <input type="text" id="customColor" placeholder="#FF0000" value="#FFFFFF">
494
  </div>
495
  </div>
496
- <p style="color: #888; font-size: 0.85rem; margin-top: 10px;">
497
- Uses RMBG-1.4 via HuggingFace for state-of-the-art background removal.
498
- </p>
499
  </div>
500
 
501
  <div id="denoiseOptions" class="feature-options">
@@ -527,7 +441,7 @@
527
  <div class="option-group">
528
  <label for="enhanceHd">AI HD Enhancement</label>
529
  <select id="enhanceHd">
530
- <option value="true" selected>Enabled (HuggingFace AI)</option>
531
  <option value="false">Disabled (faster)</option>
532
  </select>
533
  </div>
@@ -537,14 +451,14 @@
537
  </p>
538
  </div>
539
 
540
- <button class="process-btn" id="processBtn">Generate Image</button>
541
 
542
  <div class="error" id="error"></div>
543
  </section>
544
 
545
  <div class="loading" id="loading">
546
  <div class="spinner"></div>
547
- <p id="loadingText">Processing with AI...</p>
548
  <div class="progress-container">
549
  <div class="progress-percentage" id="progressPercentage">0%</div>
550
  <div class="progress-bar-wrapper">
@@ -556,8 +470,8 @@
556
  </div>
557
 
558
  <section class="results-section" id="results">
559
- <div class="image-comparison" id="imageComparison">
560
- <div class="image-box" id="originalBox">
561
  <h3>Original</h3>
562
  <img id="originalImg" src="" alt="Original image">
563
  </div>
@@ -572,17 +486,13 @@
572
  <section class="info-section">
573
  <h2>Available Features</h2>
574
  <div class="info-grid">
575
- <div class="info-item">
576
- <h4>Image Generation</h4>
577
- <p>Create images from text prompts using Stable Diffusion XL</p>
578
- </div>
579
  <div class="info-item">
580
  <h4>Image Enhancement</h4>
581
- <p>Upscale images 2x-4x using SD x4 Upscaler via HuggingFace</p>
582
  </div>
583
  <div class="info-item">
584
  <h4>Background Removal</h4>
585
- <p>Remove backgrounds using RMBG-1.4 via HuggingFace</p>
586
  </div>
587
  <div class="info-item">
588
  <h4>Noise Reduction</h4>
@@ -601,7 +511,7 @@
601
  </div>
602
 
603
  <script>
604
- const dropZone = document.getElementById('uploadZone');
605
  const fileInput = document.getElementById('fileInput');
606
  const processBtn = document.getElementById('processBtn');
607
  const loading = document.getElementById('loading');
@@ -612,9 +522,7 @@
612
  const processedImg = document.getElementById('processedImg');
613
  const downloadBtn = document.getElementById('downloadBtn');
614
  const resultBox = document.getElementById('resultBox');
615
- const originalBox = document.getElementById('originalBox');
616
  const resultLabel = document.getElementById('resultLabel');
617
- const imageComparison = document.getElementById('imageComparison');
618
  const progressBar = document.getElementById('progressBar');
619
  const progressPercentage = document.getElementById('progressPercentage');
620
  const progressMessage = document.getElementById('progressMessage');
@@ -624,7 +532,7 @@
624
  const customColorGroup = document.getElementById('customColorGroup');
625
 
626
  let selectedFile = null;
627
- let currentFeature = 'generate';
628
 
629
  featureTabs.forEach(tab => {
630
  tab.addEventListener('click', () => {
@@ -634,23 +542,14 @@
634
 
635
  document.querySelectorAll('.feature-options').forEach(opt => opt.classList.remove('active'));
636
 
637
- if (currentFeature === 'generate') {
638
- document.getElementById('generateOptions').classList.add('active');
639
- dropZone.style.display = 'none';
640
- processBtn.disabled = false;
641
- } else {
642
- dropZone.style.display = 'block';
643
- processBtn.disabled = !selectedFile;
644
-
645
- if (currentFeature === 'enhance') {
646
- document.getElementById('enhanceOptions').classList.add('active');
647
- } else if (currentFeature === 'remove-bg') {
648
- document.getElementById('removeBgOptions').classList.add('active');
649
- } else if (currentFeature === 'denoise') {
650
- document.getElementById('denoiseOptions').classList.add('active');
651
- } else if (currentFeature === 'docscan') {
652
- document.getElementById('docscanOptions').classList.add('active');
653
- }
654
  }
655
 
656
  updateButtonText();
@@ -663,7 +562,6 @@
663
 
664
  function updateButtonText() {
665
  const texts = {
666
- 'generate': 'Generate Image',
667
  'enhance': 'Enhance Image',
668
  'remove-bg': 'Remove Background',
669
  'denoise': 'Denoise Image',
@@ -705,9 +603,7 @@
705
  }
706
 
707
  selectedFile = file;
708
- if (currentFeature !== 'generate') {
709
- processBtn.disabled = false;
710
- }
711
  dropZone.innerHTML = `
712
  <div class="drop-zone-icon">✅</div>
713
  <p><strong>${file.name}</strong></p>
@@ -760,155 +656,138 @@
760
  const resultResponse = await fetch(resultUrl);
761
 
762
  if (resultResponse.status === 202) {
763
- await new Promise(r => setTimeout(r, 500));
764
  resultRetries++;
 
765
  continue;
766
  }
767
 
768
  if (!resultResponse.ok) {
769
- throw new Error('Failed to get result');
 
 
 
 
 
770
  }
771
 
772
  const blob = await resultResponse.blob();
773
  return URL.createObjectURL(blob);
774
  }
775
- throw new Error('Result not ready after completion');
 
 
776
  }
777
 
778
- if (data.status === 'failed') {
779
- throw new Error(data.error || 'Processing failed');
780
- }
781
-
782
- await new Promise(r => setTimeout(r, 1000));
783
- } catch (e) {
784
- throw e;
785
  }
786
  }
787
 
788
- throw new Error('Processing timed out');
789
  }
790
 
791
  processBtn.addEventListener('click', async () => {
792
- if (currentFeature !== 'generate' && !selectedFile) {
793
- showError('Please select an image first');
794
- return;
795
- }
796
-
797
- if (currentFeature === 'generate') {
798
- const prompt = document.getElementById('prompt').value.trim();
799
- if (!prompt) {
800
- showError('Please enter a prompt describing the image you want to create');
801
- return;
802
- }
803
- }
804
 
805
- hideError();
806
- results.classList.remove('show');
807
- loading.classList.add('show');
808
- processBtn.disabled = true;
809
- resetProgress();
810
 
811
- let endpoint = '';
812
- let formData = new FormData();
813
- let useAsync = true;
814
- let isGenerate = false;
815
 
816
- if (currentFeature === 'generate') {
817
- isGenerate = true;
818
- const prompt = document.getElementById('prompt').value.trim();
819
- const negativePrompt = document.getElementById('negativePrompt').value.trim();
820
- const width = document.getElementById('genWidth').value;
821
- const height = document.getElementById('genHeight').value;
822
- const guidanceScale = document.getElementById('guidanceScale').value;
823
- const steps = document.getElementById('steps').value;
824
-
825
- endpoint = `/generate/async?prompt=${encodeURIComponent(prompt)}&negative_prompt=${encodeURIComponent(negativePrompt)}&width=${width}&height=${height}&guidance_scale=${guidanceScale}&steps=${steps}`;
826
- loadingText.textContent = 'Generating image with Stable Diffusion XL...';
827
- } else if (currentFeature === 'enhance') {
828
  const scale = document.getElementById('scale').value;
829
- endpoint = `/enhance/async?scale=${scale}`;
830
- formData.append('file', selectedFile);
831
- loadingText.textContent = 'Enhancing with SD x4 Upscaler...';
832
  } else if (currentFeature === 'remove-bg') {
 
833
  let bgcolor = bgcolorSelect.value;
834
  if (bgcolor === 'custom') {
835
  bgcolor = document.getElementById('customColor').value;
836
  }
837
- endpoint = `/remove-background/async?bgcolor=${encodeURIComponent(bgcolor)}`;
838
- formData.append('file', selectedFile);
839
- loadingText.textContent = 'Removing background with RMBG-1.4...';
 
840
  } else if (currentFeature === 'denoise') {
 
841
  const strength = document.getElementById('strength').value;
842
- endpoint = `/denoise/async?strength=${strength}`;
843
- formData.append('file', selectedFile);
844
- loadingText.textContent = 'Denoising image...';
845
  } else if (currentFeature === 'docscan') {
 
846
  const docScale = document.getElementById('docScale').value;
847
  const enhanceHd = document.getElementById('enhanceHd').value;
848
- endpoint = `/docscan/async?scale=${docScale}&enhance_hd=${enhanceHd}`;
849
- formData.append('file', selectedFile);
850
- loadingText.textContent = 'Scanning document...';
 
 
 
 
 
851
  }
852
 
 
 
 
 
 
 
853
  try {
854
- let response;
855
- if (isGenerate) {
856
- response = await fetch(endpoint, { method: 'POST' });
857
- } else {
858
- response = await fetch(endpoint, {
859
- method: 'POST',
860
- body: formData
861
- });
862
- }
863
 
864
  if (!response.ok) {
865
- const err = await response.json();
866
- throw new Error(err.detail || 'Processing failed');
867
  }
868
 
869
- const data = await response.json();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
870
 
871
- if (data.job_id) {
872
- const imageUrl = await pollProgress(data.job_id, data.result_url);
873
- showResult(imageUrl, isGenerate);
874
- }
875
- } catch (e) {
876
- showError(e.message);
877
- } finally {
878
  loading.classList.remove('show');
879
- processBtn.disabled = currentFeature !== 'generate' && !selectedFile;
880
- }
881
- });
882
-
883
- function showResult(imageUrl, isGenerate = false) {
884
- processedImg.src = imageUrl;
885
- downloadBtn.href = imageUrl;
886
-
887
- const labels = {
888
- 'generate': 'Generated',
889
- 'enhance': 'Enhanced',
890
- 'remove-bg': 'Background Removed',
891
- 'denoise': 'Denoised',
892
- 'docscan': 'Scanned'
893
- };
894
- resultLabel.textContent = labels[currentFeature] || 'Processed';
895
-
896
- if (currentFeature === 'remove-bg') {
897
- resultBox.classList.add('checkerboard');
898
- } else {
899
- resultBox.classList.remove('checkerboard');
900
- }
901
-
902
- if (isGenerate) {
903
- originalBox.style.display = 'none';
904
- imageComparison.classList.add('single');
905
- } else {
906
- originalBox.style.display = 'block';
907
- imageComparison.classList.remove('single');
908
  }
909
 
910
- results.classList.add('show');
911
- }
912
 
913
  function showError(message) {
914
  error.textContent = message;
@@ -918,6 +797,8 @@
918
  function hideError() {
919
  error.classList.remove('show');
920
  }
 
 
921
  </script>
922
  </body>
923
  </html>
 
43
  font-size: 1.1rem;
44
  }
45
 
 
 
 
 
 
 
 
 
 
 
 
46
  .api-link {
47
  margin-top: 15px;
48
  }
 
114
  color: #888;
115
  }
116
 
117
+ select, input[type="text"] {
118
  width: 100%;
119
  padding: 12px;
120
  border-radius: 8px;
 
124
  font-size: 1rem;
125
  }
126
 
 
 
 
 
 
127
  .feature-tabs {
128
  display: flex;
129
  gap: 10px;
 
201
  gap: 20px;
202
  }
203
 
 
 
 
 
204
  @media (max-width: 600px) {
205
  .image-comparison {
206
  grid-template-columns: 1fr;
 
355
  background-position: 0 0, 0 10px, 10px -10px, -10px 0px;
356
  background-color: #444;
357
  }
 
 
 
 
 
 
 
 
358
  </style>
359
  </head>
360
  <body>
361
  <div class="container">
362
  <header>
363
  <h1>AI Image Processing</h1>
364
+ <p class="subtitle">Enhance, remove backgrounds, denoise, and scan documents with AI</p>
 
365
  <div class="api-link">
366
  <a href="/docs" target="_blank">View API Documentation</a>
367
  </div>
 
369
 
370
  <section class="upload-section">
371
  <div class="feature-tabs">
372
+ <button class="feature-tab active" data-feature="enhance">Enhance</button>
 
373
  <button class="feature-tab" data-feature="remove-bg">Remove Background</button>
374
  <button class="feature-tab" data-feature="denoise">Denoise</button>
375
  <button class="feature-tab" data-feature="docscan">Doc Scan</button>
376
  </div>
377
 
378
+ <div class="drop-zone" id="dropZone">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  <div class="drop-zone-icon">📷</div>
380
  <p>Drag & drop an image here or click to select</p>
381
  <p><small>Supports: PNG, JPG, JPEG, WebP, BMP</small></p>
382
  </div>
383
  <input type="file" id="fileInput" accept="image/png,image/jpeg,image/jpg,image/webp,image/bmp">
384
 
385
+ <div id="enhanceOptions" class="feature-options active">
386
  <div class="options">
387
  <div class="option-group">
388
  <label for="scale">Upscale Factor</label>
 
392
  </select>
393
  </div>
394
  </div>
 
 
 
395
  </div>
396
 
397
  <div id="removeBgOptions" class="feature-options">
 
410
  <input type="text" id="customColor" placeholder="#FF0000" value="#FFFFFF">
411
  </div>
412
  </div>
 
 
 
413
  </div>
414
 
415
  <div id="denoiseOptions" class="feature-options">
 
441
  <div class="option-group">
442
  <label for="enhanceHd">AI HD Enhancement</label>
443
  <select id="enhanceHd">
444
+ <option value="true" selected>Enabled (Real-ESRGAN)</option>
445
  <option value="false">Disabled (faster)</option>
446
  </select>
447
  </div>
 
451
  </p>
452
  </div>
453
 
454
+ <button class="process-btn" id="processBtn" disabled>Process Image</button>
455
 
456
  <div class="error" id="error"></div>
457
  </section>
458
 
459
  <div class="loading" id="loading">
460
  <div class="spinner"></div>
461
+ <p id="loadingText">Processing your image with AI...</p>
462
  <div class="progress-container">
463
  <div class="progress-percentage" id="progressPercentage">0%</div>
464
  <div class="progress-bar-wrapper">
 
470
  </div>
471
 
472
  <section class="results-section" id="results">
473
+ <div class="image-comparison">
474
+ <div class="image-box">
475
  <h3>Original</h3>
476
  <img id="originalImg" src="" alt="Original image">
477
  </div>
 
486
  <section class="info-section">
487
  <h2>Available Features</h2>
488
  <div class="info-grid">
 
 
 
 
489
  <div class="info-item">
490
  <h4>Image Enhancement</h4>
491
+ <p>Upscale images 2x-4x using Real-ESRGAN AI model</p>
492
  </div>
493
  <div class="info-item">
494
  <h4>Background Removal</h4>
495
+ <p>Remove backgrounds using BiRefNet deep learning model</p>
496
  </div>
497
  <div class="info-item">
498
  <h4>Noise Reduction</h4>
 
511
  </div>
512
 
513
  <script>
514
+ const dropZone = document.getElementById('dropZone');
515
  const fileInput = document.getElementById('fileInput');
516
  const processBtn = document.getElementById('processBtn');
517
  const loading = document.getElementById('loading');
 
522
  const processedImg = document.getElementById('processedImg');
523
  const downloadBtn = document.getElementById('downloadBtn');
524
  const resultBox = document.getElementById('resultBox');
 
525
  const resultLabel = document.getElementById('resultLabel');
 
526
  const progressBar = document.getElementById('progressBar');
527
  const progressPercentage = document.getElementById('progressPercentage');
528
  const progressMessage = document.getElementById('progressMessage');
 
532
  const customColorGroup = document.getElementById('customColorGroup');
533
 
534
  let selectedFile = null;
535
+ let currentFeature = 'enhance';
536
 
537
  featureTabs.forEach(tab => {
538
  tab.addEventListener('click', () => {
 
542
 
543
  document.querySelectorAll('.feature-options').forEach(opt => opt.classList.remove('active'));
544
 
545
+ if (currentFeature === 'enhance') {
546
+ document.getElementById('enhanceOptions').classList.add('active');
547
+ } else if (currentFeature === 'remove-bg') {
548
+ document.getElementById('removeBgOptions').classList.add('active');
549
+ } else if (currentFeature === 'denoise') {
550
+ document.getElementById('denoiseOptions').classList.add('active');
551
+ } else if (currentFeature === 'docscan') {
552
+ document.getElementById('docscanOptions').classList.add('active');
 
 
 
 
 
 
 
 
 
553
  }
554
 
555
  updateButtonText();
 
562
 
563
  function updateButtonText() {
564
  const texts = {
 
565
  'enhance': 'Enhance Image',
566
  'remove-bg': 'Remove Background',
567
  'denoise': 'Denoise Image',
 
603
  }
604
 
605
  selectedFile = file;
606
+ processBtn.disabled = false;
 
 
607
  dropZone.innerHTML = `
608
  <div class="drop-zone-icon">✅</div>
609
  <p><strong>${file.name}</strong></p>
 
656
  const resultResponse = await fetch(resultUrl);
657
 
658
  if (resultResponse.status === 202) {
 
659
  resultRetries++;
660
+ await new Promise(resolve => setTimeout(resolve, 1000));
661
  continue;
662
  }
663
 
664
  if (!resultResponse.ok) {
665
+ let errorMessage = 'Failed to get result';
666
+ try {
667
+ const errorData = await resultResponse.json();
668
+ errorMessage = errorData.detail || errorMessage;
669
+ } catch (e) {}
670
+ throw new Error(errorMessage);
671
  }
672
 
673
  const blob = await resultResponse.blob();
674
  return URL.createObjectURL(blob);
675
  }
676
+ throw new Error('Timed out waiting for result');
677
+ } else if (data.status === 'failed') {
678
+ throw new Error(data.error || data.message || 'Processing failed');
679
  }
680
 
681
+ await new Promise(resolve => setTimeout(resolve, 500));
682
+ } catch (err) {
683
+ throw err;
 
 
 
 
684
  }
685
  }
686
 
687
+ throw new Error('Timed out waiting for processing to complete');
688
  }
689
 
690
  processBtn.addEventListener('click', async () => {
691
+ if (!selectedFile) return;
 
 
 
 
 
 
 
 
 
 
 
692
 
693
+ const formData = new FormData();
694
+ formData.append('file', selectedFile);
 
 
 
695
 
696
+ let endpoint = '/enhance/async';
697
+ let params = new URLSearchParams();
 
 
698
 
699
+ if (currentFeature === 'enhance') {
700
+ endpoint = '/enhance/async';
 
 
 
 
 
 
 
 
 
 
701
  const scale = document.getElementById('scale').value;
702
+ params.append('scale', scale);
703
+ loadingText.textContent = 'Enhancing your image with AI...';
704
+ resultLabel.textContent = 'Enhanced';
705
  } else if (currentFeature === 'remove-bg') {
706
+ endpoint = '/remove-background/async';
707
  let bgcolor = bgcolorSelect.value;
708
  if (bgcolor === 'custom') {
709
  bgcolor = document.getElementById('customColor').value;
710
  }
711
+ params.append('bgcolor', bgcolor);
712
+ loadingText.textContent = 'Removing background with AI...';
713
+ resultLabel.textContent = 'Background Removed';
714
+ resultBox.classList.add('checkerboard');
715
  } else if (currentFeature === 'denoise') {
716
+ endpoint = '/denoise/async';
717
  const strength = document.getElementById('strength').value;
718
+ params.append('strength', strength);
719
+ loadingText.textContent = 'Reducing noise in your image...';
720
+ resultLabel.textContent = 'Denoised';
721
  } else if (currentFeature === 'docscan') {
722
+ endpoint = '/docscan/async';
723
  const docScale = document.getElementById('docScale').value;
724
  const enhanceHd = document.getElementById('enhanceHd').value;
725
+ params.append('scale', docScale);
726
+ params.append('enhance_hd', enhanceHd);
727
+ loadingText.textContent = 'Scanning and enhancing document...';
728
+ resultLabel.textContent = 'Scanned Document';
729
+ }
730
+
731
+ if (currentFeature !== 'remove-bg') {
732
+ resultBox.classList.remove('checkerboard');
733
  }
734
 
735
+ loading.classList.add('show');
736
+ results.classList.remove('show');
737
+ processBtn.disabled = true;
738
+ hideError();
739
+ resetProgress();
740
+
741
  try {
742
+ const response = await fetch(`${endpoint}?${params.toString()}`, {
743
+ method: 'POST',
744
+ body: formData
745
+ });
 
 
 
 
 
746
 
747
  if (!response.ok) {
748
+ const errorData = await response.json();
749
+ throw new Error(errorData.detail || 'Processing failed');
750
  }
751
 
752
+ const jobData = await response.json();
753
+ const jobId = jobData.job_id;
754
+ const resultUrl = jobData.result_url;
755
+
756
+ updateProgress(5, 'Job started...');
757
+
758
+ const imageUrl = await pollProgress(jobId, resultUrl);
759
+
760
+ updateProgress(100, 'Loading result image...');
761
+
762
+ await new Promise((resolve, reject) => {
763
+ processedImg.onload = resolve;
764
+ processedImg.onerror = () => reject(new Error('Failed to load result image'));
765
+ processedImg.src = imageUrl;
766
+
767
+ setTimeout(() => resolve(), 10000);
768
+ });
769
+
770
+ downloadBtn.href = imageUrl;
771
+
772
+ const filenames = {
773
+ 'enhance': 'enhanced',
774
+ 'remove-bg': 'nobg',
775
+ 'denoise': 'denoised',
776
+ 'docscan': 'scanned'
777
+ };
778
+ const filename = filenames[currentFeature] || 'processed';
779
+ downloadBtn.download = `${filename}_${selectedFile.name.split('.')[0]}.png`;
780
 
 
 
 
 
 
 
 
781
  loading.classList.remove('show');
782
+ results.classList.add('show');
783
+
784
+ } catch (err) {
785
+ showError(err.message);
786
+ loading.classList.remove('show');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
787
  }
788
 
789
+ processBtn.disabled = false;
790
+ });
791
 
792
  function showError(message) {
793
  error.textContent = message;
 
797
  function hideError() {
798
  error.classList.remove('show');
799
  }
800
+
801
+ updateButtonText();
802
  </script>
803
  </body>
804
  </html>