Spaces:

brightening-eyes
/

cloudzy-image-processor

Sleeping

App Files Files Community

brightening-eyes commited on Nov 25, 2025

Commit

835c41f

1 Parent(s): 1b312ca

initial commit

Browse files

Files changed (4) hide show

Dockerfile +17 -0
main.py +304 -0
requirements.txt +13 -0
templates/index.html +92 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.10.5
+# Set the working directory to /code
+WORKDIR /code
+# Copy the current directory contents into the container at .
+COPY . .
+# upgrade pip
+RUN python -m pip install --upgrade pip
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Start the FastAPI app on port 7860, the default port expected by Spaces
+ENTRYPOINT ["uvicorn"]
+CMD ["main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,304 @@

+import os
+import uuid
+import shutil
+import sqlite3
+import json
+import logging
+import asyncio
+import numpy as np
+import chromadb
+import cv2
+from datetime import datetime
+from typing import List, Optional
+from contextlib import asynccontextmanager
+# FastAPI & Utilities
+from fastapi import FastAPI, UploadFile, File, BackgroundTasks, HTTPException, Request, Form
+from fastapi.responses import JSONResponse, HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from pydantic import BaseModel
+# AI Libraries
+import torch
+from PIL import Image
+from sentence_transformers import SentenceTransformer
+from transformers import BlipProcessor, BlipForConditionalGeneration
+from insightface.app import FaceAnalysis
+# --- CONFIGURATION ---
+UPLOAD_DIR = "static/uploads"
+DB_PATH = "photos.db"
+CHROMA_PATH = "chroma_db"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Ensure directories exist
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+# --- LOGGING ---
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("CloudzyAI")
+# --- GLOBAL MODELS (Loaded on Startup) ---
+ai_models = {}
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # 1. Load CLIP for Semantic Search (Text <-> Image)
+    logger.info("Loading CLIP model...")
+    ai_models["clip"] = SentenceTransformer('clip-ViT-B-32', device=device)
+    # 2. Load BLIP for Captioning
+    logger.info("Loading BLIP model...")
+    ai_models["blip_processor"] = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+    ai_models["blip_model"] = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
+    # 3. Load InsightFace for Smart Analysis
+    logger.info("Loading InsightFace model...")
+    # 'buffalo_l' is a good default model pack. It downloads automatically on first run.
+    app_face = FaceAnalysis(name='buffalo_l', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+    app_face.prepare(ctx_id=0, det_size=(640, 640))
+    ai_models["face"] = app_face
+    # 4. Initialize Database
+    init_db()
+    yield
+    logger.info("Shutting down...")
+app = FastAPI(lifespan=lifespan)
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates") # Create this folder
+# --- DATABASE SETUP (SQLite + ChromaDB) ---
+chroma_client = chromadb.PersistentClient(path=CHROMA_PATH)
+collection = chroma_client.get_or_create_collection(name="photo_embeddings")
+def init_db():
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS photos (
+            id TEXT PRIMARY KEY,
+            filename TEXT,
+            filepath TEXT,
+            upload_date TEXT,
+            caption TEXT,
+            tags TEXT,
+            smart_analysis TEXT,
+            status TEXT
+        )
+    """)
+    conn.commit()
+    conn.close()
+# --- Pydantic Models ---
+class PhotoResponse(BaseModel):
+    id: str
+    filename: str
+    url: str
+    caption: Optional[str] = None
+    tags: List[str] = []
+    smart_features: Optional[dict] = None
+    upload_date: str
+# --- AI PROCESSING TASKS ---
+def process_image_task(photo_id: str, file_path: str):
+    """
+    Background task that runs the AI pipeline:
+    1. Generate Caption (BLIP)
+    2. Analyze Faces (InsightFace)
+    3. Create Embeddings (CLIP)
+    4. Update DBs
+    """
+    logger.info(f"Starting AI analysis for {photo_id}")
+    try:
+        # Load Images
+        pil_image = Image.open(file_path).convert("RGB")
+        cv_image = cv2.imread(file_path) # InsightFace needs OpenCV format
+        # A. Captioning (BLIP)
+        inputs = ai_models["blip_processor"](pil_image, return_tensors="pt").to(device)
+        out = ai_models["blip_model"].generate(**inputs)
+        caption = ai_models["blip_processor"].decode(out[0], skip_special_tokens=True)
+        # B. Smart Feature: Face Analysis (InsightFace)
+        faces = ai_models["face"].get(cv_image)
+        face_data = []
+        tags = ["ai-generated"]
+        if len(faces) > 0:
+            avg_age = np.mean([face.age for face in faces])
+            gender_counts = {"M": 0, "F": 0}
+            for face in faces:
+                gender = "M" if face.sex == 1 else "F"
+                gender_counts[gender] += 1
+                face_data.append({
+                    "age": int(face.age),
+                    "gender": gender,
+                    "confidence": float(face.det_score)
+                })
+            # Smart Tagging based on Analysis
+            tags.append("person")
+            tags.append(f"{len(faces)} people")
+            if gender_counts["M"] > gender_counts["F"]: tags.append("mostly_male")
+            if gender_counts["F"] > gender_counts["M"]: tags.append("mostly_female")
+            if avg_age < 18: tags.append("youth")
+            elif avg_age > 60: tags.append("senior")
+            else: tags.append("adult")
+        else:
+            tags.append("scenery") # Fallback tag
+            face_data = {"message": "No faces detected"}
+        # Combine caption words into tags (Simple approach)
+        tags.extend([word for word in caption.split() if len(word) > 4])
+        tags = list(set(tags)) # unique
+        # C. Embedding (CLIP)
+        # We embed the IMAGE itself for semantic search
+        embedding = ai_models["clip"].encode(pil_image).tolist()
+        # D. Save Results
+        conn = sqlite3.connect(DB_PATH)
+        cursor = conn.cursor()
+        cursor.execute("""
+            UPDATE photos
+            SET caption = ?, tags = ?, smart_analysis = ?, status = 'completed'
+            WHERE id = ?
+        """, (caption, json.dumps(tags), json.dumps(face_data), photo_id))
+        conn.commit()
+        conn.close()
+        # Save to ChromaDB
+        collection.add(
+            ids=[photo_id],
+            embeddings=[embedding],
+            metadatas=[{"caption": caption}]
+        )
+        logger.info(f"AI processing completed for {photo_id}")
+    except Exception as e:
+        logger.error(f"Error processing {photo_id}: {e}")
+        conn = sqlite3.connect(DB_PATH)
+        conn.execute("UPDATE photos SET status = 'failed' WHERE id = ?", (photo_id,))
+        conn.commit()
+        conn.close()
+# --- API ENDPOINTS ---
+@app.get("/", response_class=HTMLResponse)
+async def read_root(request: Request):
+    """Serve the UI"""
+    return templates.TemplateResponse("index.html", {"request": request})
+@app.post("/upload", response_model=PhotoResponse)
+async def upload_photo(file: UploadFile = File(...), background_tasks: BackgroundTasks = None):
+    """
+    1. Validate file
+    2. Save to disk
+    3. create DB record
+    4. Trigger Async AI Task
+    """
+    if not file.content_type.startswith("image/"):
+        raise HTTPException(status_code=400, detail="File must be an image")
+    file_id = str(uuid.uuid4())
+    ext = file.filename.split(".")[-1]
+    filename = f"{file_id}.{ext}"
+    file_path = os.path.join(UPLOAD_DIR, filename)
+    # Save file
+    with open(file_path, "wb") as buffer:
+        shutil.copyfileobj(file.file, buffer)
+    # Initial DB Record
+    conn = sqlite3.connect(DB_PATH)
+    cursor = conn.cursor()
+    cursor.execute("""
+        INSERT INTO photos (id, filename, filepath, upload_date, status)
+        VALUES (?, ?, ?, ?, 'processing')
+    """, (file_id, file.filename, file_path, datetime.now().isoformat()))
+    conn.commit()
+    conn.close()
+    # Trigger AI
+    background_tasks.add_task(process_image_task, file_id, file_path)
+    return {
+        "id": file_id,
+        "filename": file.filename,
+        "url": f"/static/uploads/{filename}",
+        "upload_date": datetime.now().isoformat()
+    }
+@app.get("/photo/{photo_id}", response_model=PhotoResponse)
+async def get_photo(photo_id: str):
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    cursor = conn.cursor()
+    row = cursor.execute("SELECT * FROM photos WHERE id = ?", (photo_id,)).fetchone()
+    conn.close()
+    if not row:
+        raise HTTPException(status_code=404, detail="Photo not found")
+    return {
+        "id": row["id"],
+        "filename": row["filename"],
+        "url": f"/{row['filepath']}",
+        "caption": row["caption"],
+        "tags": json.loads(row["tags"]) if row["tags"] else [],
+        "smart_features": json.loads(row["smart_analysis"]) if row["smart_analysis"] else None,
+        "upload_date": row["upload_date"]
+    }
+@app.get("/search")
+async def search_photos(q: str):
+    """
+    Semantic Search:
+    1. Embed query text using CLIP.
+    2. Search ChromaDB for nearest image vectors.
+    3. Retrieve metadata from SQLite.
+    """
+    # Embed query text
+    query_vec = ai_models["clip"].encode(q).tolist()
+    # Query Vector DB
+    results = collection.query(
+        query_embeddings=[query_vec],
+        n_results=5
+    )
+    ids = results["ids"][0]
+    if not ids:
+        return []
+    # Fetch details from SQLite
+    placeholders = ",".join("?" * len(ids))
+    conn = sqlite3.connect(DB_PATH)
+    conn.row_factory = sqlite3.Row
+    cursor = conn.cursor()
+    # We use a trick to preserve order or just fetch all and map
+    rows = cursor.execute(f"SELECT * FROM photos WHERE id IN ({placeholders})", ids).fetchall()
+    conn.close()
+    # Format response
+    response_data = []
+    for row in rows:
+        response_data.append({
+            "id": row["id"],
+            "url": f"/{row['filepath']}",
+            "caption": row["caption"],
+            "tags": json.loads(row["tags"]) if row["tags"] else [],
+            "smart_features": json.loads(row["smart_analysis"]) if row["smart_analysis"] else None,
+        })
+    return response_data
+if __name__ == '__main__':
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi
+uvicorn
+python-multipart
+chromadb
+sentence-transformers
+transformers
+torch
+pillow
+insightface
+onnxruntime
+opencv-python
+jinja2
+numpy

templates/index.html ADDED Viewed

	@@ -0,0 +1,92 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Cloudzy AI Photo Manager</title>
+    <style>
+        body { font-family: sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
+        .upload-box { border: 2px dashed #ccc; padding: 20px; text-align: center; margin-bottom: 20px; }
+        .gallery { display: grid; grid-template-columns: repeat(auto-fill, minmax(200px, 1fr)); gap: 20px; }
+        .photo-card { border: 1px solid #eee; padding: 10px; border-radius: 8px; }
+        .photo-card img { width: 100%; height: 150px; object-fit: cover; border-radius: 4px; }
+        .tags { font-size: 0.8em; color: #666; }
+        .meta { font-size: 0.7em; color: #888; margin-top: 5px; }
+    </style>
+</head>
+<body>
+    <h1>Cloudzy AI Challenge</h1>
+    <div class="upload-box">
+        <h3>Upload Photo</h3>
+        <input type="file" id="fileInput">
+        <button onclick="uploadPhoto()">Upload</button>
+        <p id="uploadStatus"></p>
+    </div>
+    <div style="margin-bottom: 20px;">
+        <input type="text" id="searchInput" placeholder="Search (e.g., 'dog in grass' or 'happy person')..." style="width: 70%;">
+        <button onclick="searchPhotos()">Semantic Search</button>
+    </div>
+    <div id="gallery" class="gallery"></div>
+    <script>
+        async function uploadPhoto() {
+            const fileInput = document.getElementById('fileInput');
+            const status = document.getElementById('uploadStatus');
+            if (!fileInput.files[0]) return alert("Select a file!");
+            const formData = new FormData();
+            formData.append('file', fileInput.files[0]);
+            status.innerText = "Uploading...";
+            try {
+                const res = await fetch('/upload', { method: 'POST', body: formData });
+                const data = await res.json();
+                status.innerText = "Upload successful! ID: " + data.id + ". Processing AI...";
+                setTimeout(() => searchPhotos(""), 2000); // Auto refresh
+            } catch (e) {
+                status.innerText = "Error uploading.";
+            }
+        }
+        async function searchPhotos() {
+            const query = document.getElementById('searchInput').value;
+            const gallery = document.getElementById('gallery');
+            gallery.innerHTML = "Loading...";
+            let url = query ? `/search?q=${encodeURIComponent(query)}` : '/search?q=recent';
+            // Note: Empty search isn't strictly defined in backend, using "recent" logic or just querying specific keyword for demo if empty
+            if (!query) return;
+            const res = await fetch(url);
+            const photos = await res.json();
+            gallery.innerHTML = "";
+            photos.forEach(photo => {
+                const div = document.createElement('div');
+                div.className = 'photo-card';
+                // Parse smart features for display
+                let faceInfo = "";
+                if (photo.smart_features && Array.isArray(photo.smart_features)) {
+                    faceInfo = `${photo.smart_features.length} Face(s) detected`;
+                } else if (photo.smart_features && photo.smart_features.message) {
+                    faceInfo = photo.smart_features.message;
+                }
+                div.innerHTML = `
+                    <img src="${photo.url}" alt="photo">
+                    <p><strong>${photo.caption || "Processing..."}</strong></p>
+                    <div class="tags">${photo.tags.slice(0, 5).join(", ")}</div>
+                    <div class="meta">${faceInfo}</div>
+                `;
+                gallery.appendChild(div);
+            });
+        }
+    </script>
+</body>
+</html>