| import base64
|
| import hashlib
|
| import io
|
| import json
|
| import os
|
| import tempfile
|
| import uuid
|
| from typing import Optional
|
|
|
| from fastapi import FastAPI, Depends, HTTPException, Header, Response
|
| from pydantic import BaseModel
|
| from huggingface_hub import (
|
| batch_bucket_files,
|
| download_bucket_files,
|
| list_bucket_tree,
|
| )
|
| from PIL import Image
|
|
|
| Image.MAX_IMAGE_PIXELS = 25_000_000
|
|
|
|
|
|
|
|
|
|
|
| BUCKET_ID = "superhumania/lightweight"
|
| HF_TOKEN = os.environ["HF_TOKEN"]
|
| PROXY_SECRET = os.environ["HF_PROXY_SECRET"]
|
|
|
| MAGIC_BYTES = {
|
| b"\x89PNG": "image/png",
|
| b"\xff\xd8\xff": "image/jpeg",
|
| b"RIFF": "image/webp",
|
| b"GIF8": "image/gif",
|
| }
|
|
|
|
|
| def _validate_magic_bytes(raw: bytes) -> bool:
|
| """Check that raw bytes start with a known image magic signature."""
|
| for sig in MAGIC_BYTES:
|
| if raw[: len(sig)] == sig:
|
| return True
|
| return False
|
|
|
| app = FastAPI()
|
|
|
|
|
|
|
|
|
|
|
|
|
| async def verify_token(authorization: str = Header(...)):
|
| """Verify Bearer token matches HF_PROXY_SECRET."""
|
| if not authorization.startswith("Bearer "):
|
| raise HTTPException(401, "Missing bearer token")
|
| if authorization[7:] != PROXY_SECRET:
|
| raise HTTPException(401, "Invalid token")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| class UploadRequest(BaseModel):
|
| path: str
|
| content: str
|
| content_hash: Optional[str] = None
|
|
|
|
|
| class DownloadRequest(BaseModel):
|
| path: str
|
|
|
|
|
| class BatchOperation(BaseModel):
|
| action: str
|
| path: str
|
| content: str
|
| content_hash: Optional[str] = None
|
|
|
|
|
| class BatchRequest(BaseModel):
|
| operations: list[BatchOperation]
|
|
|
|
|
| class ListRequest(BaseModel):
|
| prefix: str
|
|
|
|
|
| class DeleteRequest(BaseModel):
|
| paths: list[str]
|
|
|
|
|
| class ImageRequest(BaseModel):
|
| session_id: str
|
| image_data: str
|
| media_type: str
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def _hash_index_path(prefix: str) -> str:
|
| """Return the bucket path for a hash index file."""
|
| return f"_hashes/{prefix}.json"
|
|
|
|
|
| def _load_hash_index(prefix: str) -> dict:
|
| """Download and parse the hash index for *prefix* from the bucket.
|
|
|
| Returns an empty dict if the index file does not exist yet.
|
| """
|
| bucket_path = _hash_index_path(prefix)
|
| tmp = None
|
| try:
|
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".json")
|
| tmp.close()
|
| download_bucket_files(
|
| BUCKET_ID,
|
| files=[(bucket_path, tmp.name)],
|
| token=HF_TOKEN,
|
| )
|
| with open(tmp.name, "r", encoding="utf-8") as f:
|
| return json.load(f)
|
| except Exception:
|
|
|
| return {}
|
| finally:
|
| if tmp is not None and os.path.exists(tmp.name):
|
| os.unlink(tmp.name)
|
|
|
|
|
| def _save_hash_index(prefix: str, index: dict) -> None:
|
| """Serialize *index* to JSON and upload to the bucket."""
|
| bucket_path = _hash_index_path(prefix)
|
| data = json.dumps(index, separators=(",", ":")).encode("utf-8")
|
| try:
|
| batch_bucket_files(
|
| BUCKET_ID,
|
| add=[(data, bucket_path)],
|
| token=HF_TOKEN,
|
| )
|
| except Exception:
|
|
|
| pass
|
|
|
|
|
| def _get_prefix(path: str) -> str:
|
| """Extract the first path segment (e.g. 'sessions' from 'sessions/u/123/file')."""
|
| return path.split("/")[0] if "/" in path else "default"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.get("/health")
|
| async def health():
|
| return {"status": "ok", "bucket": BUCKET_ID}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.post("/upload", dependencies=[Depends(verify_token)])
|
| async def upload_file(request: UploadRequest):
|
| prefix = _get_prefix(request.path)
|
|
|
|
|
| if request.content_hash:
|
| index = _load_hash_index(prefix)
|
| if index.get(request.path) == request.content_hash:
|
| return {"ok": True, "path": request.path, "skipped": True}
|
|
|
| try:
|
| batch_bucket_files(
|
| BUCKET_ID,
|
| add=[(request.content.encode("utf-8"), request.path)],
|
| token=HF_TOKEN,
|
| )
|
| except Exception as e:
|
| raise HTTPException(500, f"Upload failed: {e}")
|
|
|
|
|
| if request.content_hash:
|
| index = _load_hash_index(prefix)
|
| index[request.path] = request.content_hash
|
| _save_hash_index(prefix, index)
|
|
|
| return {"ok": True, "path": request.path, "skipped": False}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.post("/download", dependencies=[Depends(verify_token)])
|
| async def download_file(request: DownloadRequest):
|
| tmp_path = None
|
| try:
|
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".tmp")
|
| tmp_path = tmp.name
|
| tmp.close()
|
|
|
| download_bucket_files(
|
| BUCKET_ID,
|
| files=[(request.path, tmp_path)],
|
| token=HF_TOKEN,
|
| )
|
|
|
| with open(tmp_path, "r", encoding="utf-8") as f:
|
| content = f.read()
|
|
|
| return {"ok": True, "content": content}
|
| except Exception as e:
|
| raise HTTPException(404, f"Download failed: {e}")
|
| finally:
|
| if tmp_path is not None and os.path.exists(tmp_path):
|
| os.unlink(tmp_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.post("/batch", dependencies=[Depends(verify_token)])
|
| async def batch_operations(request: BatchRequest):
|
| add_files: list[tuple[bytes, str]] = []
|
| skipped_count = 0
|
|
|
|
|
| prefix_indexes: dict[str, dict] = {}
|
|
|
| for op in request.operations:
|
| if op.action != "upload":
|
| continue
|
|
|
| prefix = _get_prefix(op.path)
|
|
|
|
|
| if op.content_hash:
|
| if prefix not in prefix_indexes:
|
| prefix_indexes[prefix] = _load_hash_index(prefix)
|
| if prefix_indexes[prefix].get(op.path) == op.content_hash:
|
| skipped_count += 1
|
| continue
|
|
|
| add_files.append((op.content.encode("utf-8"), op.path))
|
|
|
| if add_files:
|
| try:
|
| batch_bucket_files(
|
| BUCKET_ID,
|
| add=add_files,
|
| token=HF_TOKEN,
|
| )
|
| except Exception as e:
|
| raise HTTPException(500, f"Batch upload failed: {e}")
|
|
|
|
|
| updated_prefixes: set[str] = set()
|
| for op in request.operations:
|
| if op.action != "upload" or not op.content_hash:
|
| continue
|
| prefix = _get_prefix(op.path)
|
|
|
| if prefix not in prefix_indexes:
|
| prefix_indexes[prefix] = _load_hash_index(prefix)
|
| was_skipped = prefix_indexes[prefix].get(op.path) == op.content_hash
|
| if not was_skipped:
|
| prefix_indexes[prefix][op.path] = op.content_hash
|
| updated_prefixes.add(prefix)
|
|
|
| for prefix in updated_prefixes:
|
| _save_hash_index(prefix, prefix_indexes[prefix])
|
|
|
| return {"ok": True, "uploaded": len(add_files), "skipped": skipped_count}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.post("/list", dependencies=[Depends(verify_token)])
|
| async def list_files(request: ListRequest):
|
| try:
|
| items = list(
|
| list_bucket_tree(
|
| BUCKET_ID,
|
| prefix=request.prefix,
|
| recursive=True,
|
| token=HF_TOKEN,
|
| )
|
| )
|
| files = [
|
| {"path": item.path, "size": item.size}
|
| for item in items
|
| if item.type == "file"
|
| ]
|
| return {"ok": True, "files": files}
|
| except Exception as e:
|
| raise HTTPException(500, f"List failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.post("/delete", dependencies=[Depends(verify_token)])
|
| async def delete_files(request: DeleteRequest):
|
| try:
|
| batch_bucket_files(
|
| BUCKET_ID,
|
| delete=request.paths,
|
| token=HF_TOKEN,
|
| )
|
| return {"ok": True, "deleted": len(request.paths)}
|
| except Exception as e:
|
| raise HTTPException(500, f"Delete failed: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.post("/image", dependencies=[Depends(verify_token)])
|
| async def upload_image(request: ImageRequest):
|
| """Accept base64 image, validate, strip EXIF, convert to WebP, store in Bucket."""
|
| raw = base64.b64decode(request.image_data)
|
|
|
| if len(raw) > 20_000_000:
|
| raise HTTPException(400, "Image too large: max 20MB")
|
|
|
| if not _validate_magic_bytes(raw):
|
| raise HTTPException(
|
| 400, "Invalid image format: only PNG, JPEG, WebP, GIF allowed"
|
| )
|
|
|
| try:
|
| img = Image.open(io.BytesIO(raw))
|
| except Exception as e:
|
| raise HTTPException(400, f"Cannot decode image: {e}")
|
|
|
| original_size_bytes = len(raw)
|
|
|
|
|
| img.info.pop("exif", None)
|
|
|
| if img.mode not in ("RGB", "RGBA"):
|
| img = img.convert("RGB")
|
|
|
|
|
| img.thumbnail((1920, 1920), Image.LANCZOS)
|
|
|
| buf = io.BytesIO()
|
| img.save(buf, format="WEBP", quality=80, exif=b"")
|
| webp_bytes = buf.getvalue()
|
| final_w, final_h = img.size
|
|
|
| filename = f"images/{request.session_id}/{uuid.uuid4()}.webp"
|
|
|
| try:
|
| batch_bucket_files(BUCKET_ID, add=[(webp_bytes, filename)], token=HF_TOKEN)
|
| except Exception as e:
|
| raise HTTPException(500, f"Bucket upload failed: {e}")
|
|
|
| serve_url = f"/image/{filename}"
|
| return {
|
| "ok": True,
|
| "serve_url": serve_url,
|
| "width": final_w,
|
| "height": final_h,
|
| "size_bytes": len(webp_bytes),
|
| "original_size_bytes": original_size_bytes,
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| @app.get("/image/{path:path}")
|
| async def serve_image(path: str):
|
| """Serve raw image bytes from Bucket. Public, no auth required."""
|
| tmp_path = None
|
| try:
|
| tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".img")
|
| tmp_path = tmp.name
|
| tmp.close()
|
|
|
| download_bucket_files(
|
| BUCKET_ID, files=[(path, tmp_path)], token=HF_TOKEN
|
| )
|
|
|
| with open(tmp_path, "rb") as f:
|
| file_bytes = f.read()
|
|
|
| if path.endswith(".webp"):
|
| content_type = "image/webp"
|
| elif path.endswith(".png"):
|
| content_type = "image/png"
|
| elif path.endswith((".jpg", ".jpeg")):
|
| content_type = "image/jpeg"
|
| else:
|
| content_type = "application/octet-stream"
|
|
|
| return Response(
|
| content=file_bytes,
|
| media_type=content_type,
|
| headers={"Cache-Control": "public, max-age=86400"},
|
| )
|
| except Exception:
|
| raise HTTPException(404, "Image not found")
|
| finally:
|
| if tmp_path and os.path.exists(tmp_path):
|
| os.unlink(tmp_path)
|
|
|