Upload 7 files
Browse files- Dockerfile +35 -16
- app.py +64 -57
- best.pt +3 -0
- image_processing.py +44 -0
- models.py +27 -0
- requirements.txt +18 -5
- utils.py +35 -0
Dockerfile
CHANGED
|
@@ -1,16 +1,35 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use an official Python runtime as a parent image
|
| 2 |
+
FROM python:3.9-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory in the container
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Install system dependencies required for OpenCV
|
| 8 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 9 |
+
libgl1-mesa-glx \
|
| 10 |
+
libglib2.0-0 \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
+
|
| 13 |
+
# Copy the requirements.txt into the container at /app
|
| 14 |
+
COPY requirements.txt /app/
|
| 15 |
+
|
| 16 |
+
# Install any needed Python dependencies
|
| 17 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 18 |
+
|
| 19 |
+
# Create writable directories for caching and configuration
|
| 20 |
+
RUN mkdir -p /app/cache /app/config && chmod -R 777 /app/cache /app/config
|
| 21 |
+
|
| 22 |
+
# Set environment variables
|
| 23 |
+
ENV HF_HOME=/app/cache \
|
| 24 |
+
MPLCONFIGDIR=/app/config/matplotlib \
|
| 25 |
+
YOLO_CONFIG_DIR=/app/config/ultralytics
|
| 26 |
+
|
| 27 |
+
ENV HUGGINGFACE_HUB_ENABLE_HF_UPGRADE_CHECK=false
|
| 28 |
+
# Copy the current directory contents into the container at /app
|
| 29 |
+
COPY . /app/
|
| 30 |
+
|
| 31 |
+
# Expose port 8000 for the FastAPI app
|
| 32 |
+
EXPOSE 8000
|
| 33 |
+
|
| 34 |
+
# Run FastAPI app using uvicorn
|
| 35 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
app.py
CHANGED
|
@@ -1,57 +1,64 @@
|
|
| 1 |
-
import
|
| 2 |
-
from fastapi import
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
from
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
<
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException
|
| 2 |
+
from fastapi.responses import HTMLResponse
|
| 3 |
+
import base64
|
| 4 |
+
|
| 5 |
+
from models import load_models
|
| 6 |
+
from image_processing import process_image_with_models
|
| 7 |
+
from utils import validate_image, save_temp_image
|
| 8 |
+
|
| 9 |
+
# Initialize FastAPI
|
| 10 |
+
app = FastAPI()
|
| 11 |
+
|
| 12 |
+
# Load models at startup
|
| 13 |
+
models = load_models(device='cpu') # Use 'cuda' if GPU is available
|
| 14 |
+
|
| 15 |
+
@app.get("/", response_class=HTMLResponse)
|
| 16 |
+
async def read_root():
|
| 17 |
+
return """
|
| 18 |
+
<html>
|
| 19 |
+
<body>
|
| 20 |
+
<h1>Image Processing API</h1>
|
| 21 |
+
<p>Visit <a href="/docs">/docs</a> to see the API documentation.</p>
|
| 22 |
+
</body>
|
| 23 |
+
</html>
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
@app.post("/process/")
|
| 27 |
+
async def process_image(
|
| 28 |
+
file: UploadFile = File(...),
|
| 29 |
+
box_threshold: float = 0.05,
|
| 30 |
+
iou_threshold: float = 0.1
|
| 31 |
+
):
|
| 32 |
+
try:
|
| 33 |
+
# Validate file type
|
| 34 |
+
if file.content_type not in ["image/png", "image/jpeg", "image/jpg"]:
|
| 35 |
+
raise HTTPException(
|
| 36 |
+
status_code=400,
|
| 37 |
+
detail="Invalid file type. Please upload a PNG or JPEG image."
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# Read and validate image
|
| 41 |
+
content = await file.read()
|
| 42 |
+
image = validate_image(content)
|
| 43 |
+
|
| 44 |
+
# Process image
|
| 45 |
+
labeled_img, coordinates, parsed_content = process_image_with_models(
|
| 46 |
+
image,
|
| 47 |
+
models,
|
| 48 |
+
box_threshold,
|
| 49 |
+
iou_threshold
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
return {
|
| 53 |
+
"labeled_image": base64.b64encode(labeled_img).decode("utf-8"),
|
| 54 |
+
"parsed_content": "\n".join(parsed_content),
|
| 55 |
+
"coordinates": coordinates
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
except ValueError as e:
|
| 59 |
+
raise HTTPException(status_code=400, detail=str(e))
|
| 60 |
+
except Exception as e:
|
| 61 |
+
raise HTTPException(
|
| 62 |
+
status_code=500,
|
| 63 |
+
detail=f"An error occurred while processing the image: {str(e)}"
|
| 64 |
+
)
|
best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:118fa31760b4f4dfad2f236f577032d74d18cc2f382f888015153891f5f6d8e4
|
| 3 |
+
size 12222450
|
image_processing.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import base64
|
| 4 |
+
import io
|
| 5 |
+
|
| 6 |
+
def process_image_with_models(
|
| 7 |
+
image: Image.Image,
|
| 8 |
+
models: dict,
|
| 9 |
+
box_threshold: float = 0.05,
|
| 10 |
+
iou_threshold: float = 0.1
|
| 11 |
+
) -> tuple:
|
| 12 |
+
"""Process image with YOLO and captioning models."""
|
| 13 |
+
# Convert PIL Image to numpy array
|
| 14 |
+
img_array = np.array(image)
|
| 15 |
+
|
| 16 |
+
# Run YOLO detection
|
| 17 |
+
results = models['yolo_model'](img_array)
|
| 18 |
+
|
| 19 |
+
# Get bounding boxes and labels
|
| 20 |
+
boxes = results[0].boxes
|
| 21 |
+
coordinates = boxes.xyxy.cpu().numpy().tolist()
|
| 22 |
+
|
| 23 |
+
# Process with caption model
|
| 24 |
+
inputs = models['processor'](images=image, return_tensors="pt")
|
| 25 |
+
outputs = models['caption_model'].generate(
|
| 26 |
+
**inputs,
|
| 27 |
+
max_length=50,
|
| 28 |
+
num_beams=5,
|
| 29 |
+
early_stopping=True
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
# Decode captions
|
| 33 |
+
captions = models['processor'].batch_decode(outputs, skip_special_tokens=True)
|
| 34 |
+
|
| 35 |
+
# Create labeled image
|
| 36 |
+
img_with_boxes = results[0].plot()
|
| 37 |
+
|
| 38 |
+
# Convert numpy array to PIL Image and then to base64
|
| 39 |
+
labeled_img = Image.fromarray(img_with_boxes)
|
| 40 |
+
buffered = io.BytesIO()
|
| 41 |
+
labeled_img.save(buffered, format="PNG")
|
| 42 |
+
img_str = base64.b64encode(buffered.getvalue())
|
| 43 |
+
|
| 44 |
+
return img_str, coordinates, captions
|
models.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
| 2 |
+
import torch
|
| 3 |
+
from ultralytics import YOLO
|
| 4 |
+
|
| 5 |
+
def load_models(device='cpu'):
|
| 6 |
+
"""Initialize and load all required models."""
|
| 7 |
+
# Set default dtype for torch
|
| 8 |
+
torch.set_default_dtype(torch.float32)
|
| 9 |
+
|
| 10 |
+
yolo_model = YOLO('best.pt').to(device)
|
| 11 |
+
|
| 12 |
+
processor = AutoProcessor.from_pretrained(
|
| 13 |
+
"microsoft/Florence-2-base",
|
| 14 |
+
trust_remote_code=True
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
caption_model = AutoModelForCausalLM.from_pretrained(
|
| 18 |
+
"microsoft/OmniParser/icon_caption_florence",
|
| 19 |
+
torch_dtype=torch.float32, # Changed from float16 to float32
|
| 20 |
+
trust_remote_code=True
|
| 21 |
+
).to(device)
|
| 22 |
+
|
| 23 |
+
return {
|
| 24 |
+
'yolo_model': yolo_model,
|
| 25 |
+
'processor': processor,
|
| 26 |
+
'caption_model': caption_model
|
| 27 |
+
}
|
requirements.txt
CHANGED
|
@@ -1,5 +1,18 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.95.1
|
| 2 |
+
uvicorn==0.23.0
|
| 3 |
+
transformers==4.30.0
|
| 4 |
+
torch==2.1.0
|
| 5 |
+
Pillow==9.5.0
|
| 6 |
+
ultralytics
|
| 7 |
+
pillow
|
| 8 |
+
torch>=1.10.0
|
| 9 |
+
ultralytics>=8.0.0
|
| 10 |
+
numpy>=1.24.0
|
| 11 |
+
opencv-python>=4.5.5
|
| 12 |
+
fastapi>=0.95.0
|
| 13 |
+
uvicorn>=0.21.0
|
| 14 |
+
click>=8.0.4
|
| 15 |
+
typing-extensions>=4.0.0
|
| 16 |
+
pydantic>=1.10.0
|
| 17 |
+
py7zr
|
| 18 |
+
dill
|
utils.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import io
|
| 4 |
+
|
| 5 |
+
def validate_image(file_content: bytes) -> Image.Image:
|
| 6 |
+
"""Validate and return a PIL Image."""
|
| 7 |
+
try:
|
| 8 |
+
# Open the image from byte content
|
| 9 |
+
image = Image.open(io.BytesIO(file_content))
|
| 10 |
+
image.verify() # Verify it's a valid image
|
| 11 |
+
|
| 12 |
+
# Reopen the image after verification to allow further manipulation
|
| 13 |
+
image = Image.open(io.BytesIO(file_content))
|
| 14 |
+
|
| 15 |
+
# Convert to RGB if image has alpha channel or is not RGB
|
| 16 |
+
if image.mode in ('RGBA', 'LA') or (image.mode != 'RGB'):
|
| 17 |
+
image = image.convert('RGB')
|
| 18 |
+
|
| 19 |
+
return image
|
| 20 |
+
except Exception as e:
|
| 21 |
+
# Handle any errors during image verification
|
| 22 |
+
raise ValueError(f"Invalid image file: {str(e)}")
|
| 23 |
+
|
| 24 |
+
def save_temp_image(image: Image.Image, path: str) -> str:
|
| 25 |
+
"""Save image to temporary location."""
|
| 26 |
+
try:
|
| 27 |
+
# Ensure the directory exists
|
| 28 |
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
| 29 |
+
|
| 30 |
+
# Save the image to the given path
|
| 31 |
+
image.save(path)
|
| 32 |
+
return path
|
| 33 |
+
except Exception as e:
|
| 34 |
+
# Handle file saving errors
|
| 35 |
+
raise RuntimeError(f"Error saving image to {path}: {str(e)}")
|