Spaces:

Sanket17
/

weights

Runtime error

App Files Files Community

Sanket17 commited on Dec 24, 2024

Commit

1f282bf

verified ·

1 Parent(s): 21fe931

Upload 7 files

Browse files

Files changed (7) hide show

Dockerfile +35 -16
app.py +64 -57
best.pt +3 -0
image_processing.py +44 -0
models.py +27 -0
requirements.txt +18 -5
utils.py +35 -0

Dockerfile CHANGED Viewed

@@ -1,16 +1,35 @@
-# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
-# you will also find guides on how best to write your Dockerfile
-FROM python:3.9
-RUN useradd -m -u 1000 user
-USER user
-ENV PATH="/home/user/.local/bin:$PATH"
-WORKDIR /app
-COPY --chown=user ./requirements.txt requirements.txt
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
-COPY --chown=user . /app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

+# Use an official Python runtime as a parent image
+FROM python:3.9-slim
+# Set the working directory in the container
+WORKDIR /app
+# Install system dependencies required for OpenCV
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+# Copy the requirements.txt into the container at /app
+COPY requirements.txt /app/
+# Install any needed Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# Create writable directories for caching and configuration
+RUN mkdir -p /app/cache /app/config && chmod -R 777 /app/cache /app/config
+# Set environment variables
+ENV HF_HOME=/app/cache \
+    MPLCONFIGDIR=/app/config/matplotlib \
+    YOLO_CONFIG_DIR=/app/config/ultralytics
+ENV HUGGINGFACE_HUB_ENABLE_HF_UPGRADE_CHECK=false
+# Copy the current directory contents into the container at /app
+COPY . /app/
+# Expose port 8000 for the FastAPI app
+EXPOSE 8000
+# Run FastAPI app using uvicorn
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

app.py CHANGED Viewed

@@ -1,57 +1,64 @@
-import requests
-from fastapi import FastAPI, HTTPException
-from safetensors.torch import load_file
-import io
-from fastapi.responses import HTMLResponse
-# Create the FastAPI app
-app = FastAPI()
-# Home route serving static "Hello, World!" HTML content
-@app.get("/", response_class=HTMLResponse)
-async def home():
-    html_content = """
-    <!DOCTYPE html>
-    <html lang="en">
-    <head>
-        <meta charset="UTF-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
-        <title>Hello World</title>
-    </head>
-    <body>
-        <h1>Hello, World!</h1>
-        <p>This is a simple FastAPI app serving an HTML page.</p>
-    </body>
-    </html>
-    """
-    return HTMLResponse(content=html_content)
-# Define the Google Drive file ID (replace with your actual file ID)
-file_id = "1hUCqZ3X8mcM-KcwWFjcsFg7PA0hUvE3k"  # Replace this with your Google Drive file ID
-# Function to stream the model file from Google Drive
-def stream_model_from_gdrive(file_id):
-    url = f"https://drive.google.com/uc?id={file_id}&export=download"
-    response = requests.get(url, stream=True)
-    if response.status_code == 200:
-        return io.BytesIO(response.content)  # Return file content as a BytesIO object
-    else:
-        raise HTTPException(status_code=404, detail="Model not found")
-# Endpoint to load and use the model
-@app.get("/load-model")
-async def load_model():
-    try:
-        # Stream model data from Google Drive
-        model_data = stream_model_from_gdrive(file_id)
-        # Load the model from the streamed data using safetensors
-        model = load_file(model_data)
-        # Return a simple confirmation that the model is loaded
-        return {"message": "Model loaded successfully!"}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))

+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.responses import HTMLResponse
+import base64
+from models import load_models
+from image_processing import process_image_with_models
+from utils import validate_image, save_temp_image
+# Initialize FastAPI
+app = FastAPI()
+# Load models at startup
+models = load_models(device='cpu')  # Use 'cuda' if GPU is available
+@app.get("/", response_class=HTMLResponse)
+async def read_root():
+    return """
+    <html>
+        <body>
+            <h1>Image Processing API</h1>
+            <p>Visit <a href="/docs">/docs</a> to see the API documentation.</p>
+        </body>
+    </html>
+    """
+@app.post("/process/")
+async def process_image(
+    file: UploadFile = File(...),
+    box_threshold: float = 0.05,
+    iou_threshold: float = 0.1
+):
+    try:
+        # Validate file type
+        if file.content_type not in ["image/png", "image/jpeg", "image/jpg"]:
+            raise HTTPException(
+                status_code=400,
+                detail="Invalid file type. Please upload a PNG or JPEG image."
+            )
+        # Read and validate image
+        content = await file.read()
+        image = validate_image(content)
+        # Process image
+        labeled_img, coordinates, parsed_content = process_image_with_models(
+            image,
+            models,
+            box_threshold,
+            iou_threshold
+        )
+        return {
+            "labeled_image": base64.b64encode(labeled_img).decode("utf-8"),
+            "parsed_content": "\n".join(parsed_content),
+            "coordinates": coordinates
+        }
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=f"An error occurred while processing the image: {str(e)}"
+        )

best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:118fa31760b4f4dfad2f236f577032d74d18cc2f382f888015153891f5f6d8e4
+size 12222450

image_processing.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import numpy as np
+from PIL import Image
+import base64
+import io
+def process_image_with_models(
+    image: Image.Image,
+    models: dict,
+    box_threshold: float = 0.05,
+    iou_threshold: float = 0.1
+) -> tuple:
+    """Process image with YOLO and captioning models."""
+    # Convert PIL Image to numpy array
+    img_array = np.array(image)
+    # Run YOLO detection
+    results = models['yolo_model'](img_array)
+    # Get bounding boxes and labels
+    boxes = results[0].boxes
+    coordinates = boxes.xyxy.cpu().numpy().tolist()
+    # Process with caption model
+    inputs = models['processor'](images=image, return_tensors="pt")
+    outputs = models['caption_model'].generate(
+        **inputs,
+        max_length=50,
+        num_beams=5,
+        early_stopping=True
+    )
+    # Decode captions
+    captions = models['processor'].batch_decode(outputs, skip_special_tokens=True)
+    # Create labeled image
+    img_with_boxes = results[0].plot()
+    # Convert numpy array to PIL Image and then to base64
+    labeled_img = Image.fromarray(img_with_boxes)
+    buffered = io.BytesIO()
+    labeled_img.save(buffered, format="PNG")
+    img_str = base64.b64encode(buffered.getvalue())
+    return img_str, coordinates, captions

models.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from transformers import AutoProcessor, AutoModelForCausalLM
+import torch
+from ultralytics import YOLO
+def load_models(device='cpu'):
+    """Initialize and load all required models."""
+    # Set default dtype for torch
+    torch.set_default_dtype(torch.float32)
+    yolo_model = YOLO('best.pt').to(device)
+    processor = AutoProcessor.from_pretrained(
+        "microsoft/Florence-2-base",
+        trust_remote_code=True
+    )
+    caption_model = AutoModelForCausalLM.from_pretrained(
+        "microsoft/OmniParser/icon_caption_florence",
+        torch_dtype=torch.float32,  # Changed from float16 to float32
+        trust_remote_code=True
+    ).to(device)
+    return {
+        'yolo_model': yolo_model,
+        'processor': processor,
+        'caption_model': caption_model
+    }

requirements.txt CHANGED Viewed

@@ -1,5 +1,18 @@
-safetensors
-torch
-requests
-fastapi
-uvicorn

+fastapi==0.95.1
+uvicorn==0.23.0
+transformers==4.30.0
+torch==2.1.0
+Pillow==9.5.0
+ultralytics
+pillow
+torch>=1.10.0
+ultralytics>=8.0.0
+numpy>=1.24.0
+opencv-python>=4.5.5
+fastapi>=0.95.0
+uvicorn>=0.21.0
+click>=8.0.4
+typing-extensions>=4.0.0
+pydantic>=1.10.0
+py7zr
+dill

utils.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import os
+from PIL import Image
+import io
+def validate_image(file_content: bytes) -> Image.Image:
+    """Validate and return a PIL Image."""
+    try:
+        # Open the image from byte content
+        image = Image.open(io.BytesIO(file_content))
+        image.verify()  # Verify it's a valid image
+        # Reopen the image after verification to allow further manipulation
+        image = Image.open(io.BytesIO(file_content))
+        # Convert to RGB if image has alpha channel or is not RGB
+        if image.mode in ('RGBA', 'LA') or (image.mode != 'RGB'):
+            image = image.convert('RGB')
+        return image
+    except Exception as e:
+        # Handle any errors during image verification
+        raise ValueError(f"Invalid image file: {str(e)}")
+def save_temp_image(image: Image.Image, path: str) -> str:
+    """Save image to temporary location."""
+    try:
+        # Ensure the directory exists
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        # Save the image to the given path
+        image.save(path)
+        return path
+    except Exception as e:
+        # Handle file saving errors
+        raise RuntimeError(f"Error saving image to {path}: {str(e)}")