Sanket17 commited on
Commit
1f282bf
·
verified ·
1 Parent(s): 21fe931

Upload 7 files

Browse files
Files changed (7) hide show
  1. Dockerfile +35 -16
  2. app.py +64 -57
  3. best.pt +3 -0
  4. image_processing.py +44 -0
  5. models.py +27 -0
  6. requirements.txt +18 -5
  7. utils.py +35 -0
Dockerfile CHANGED
@@ -1,16 +1,35 @@
1
- # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
- # you will also find guides on how best to write your Dockerfile
3
-
4
- FROM python:3.9
5
-
6
- RUN useradd -m -u 1000 user
7
- USER user
8
- ENV PATH="/home/user/.local/bin:$PATH"
9
-
10
- WORKDIR /app
11
-
12
- COPY --chown=user ./requirements.txt requirements.txt
13
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
-
15
- COPY --chown=user . /app
16
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /app
6
+
7
+ # Install system dependencies required for OpenCV
8
+ RUN apt-get update && apt-get install -y --no-install-recommends \
9
+ libgl1-mesa-glx \
10
+ libglib2.0-0 \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Copy the requirements.txt into the container at /app
14
+ COPY requirements.txt /app/
15
+
16
+ # Install any needed Python dependencies
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Create writable directories for caching and configuration
20
+ RUN mkdir -p /app/cache /app/config && chmod -R 777 /app/cache /app/config
21
+
22
+ # Set environment variables
23
+ ENV HF_HOME=/app/cache \
24
+ MPLCONFIGDIR=/app/config/matplotlib \
25
+ YOLO_CONFIG_DIR=/app/config/ultralytics
26
+
27
+ ENV HUGGINGFACE_HUB_ENABLE_HF_UPGRADE_CHECK=false
28
+ # Copy the current directory contents into the container at /app
29
+ COPY . /app/
30
+
31
+ # Expose port 8000 for the FastAPI app
32
+ EXPOSE 8000
33
+
34
+ # Run FastAPI app using uvicorn
35
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
app.py CHANGED
@@ -1,57 +1,64 @@
1
- import requests
2
- from fastapi import FastAPI, HTTPException
3
- from safetensors.torch import load_file
4
- import io
5
- from fastapi.responses import HTMLResponse
6
-
7
- # Create the FastAPI app
8
- app = FastAPI()
9
-
10
- # Home route serving static "Hello, World!" HTML content
11
- @app.get("/", response_class=HTMLResponse)
12
- async def home():
13
- html_content = """
14
- <!DOCTYPE html>
15
- <html lang="en">
16
- <head>
17
- <meta charset="UTF-8">
18
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
19
- <title>Hello World</title>
20
- </head>
21
- <body>
22
- <h1>Hello, World!</h1>
23
- <p>This is a simple FastAPI app serving an HTML page.</p>
24
- </body>
25
- </html>
26
- """
27
- return HTMLResponse(content=html_content)
28
-
29
- # Define the Google Drive file ID (replace with your actual file ID)
30
- file_id = "1hUCqZ3X8mcM-KcwWFjcsFg7PA0hUvE3k" # Replace this with your Google Drive file ID
31
-
32
- # Function to stream the model file from Google Drive
33
- def stream_model_from_gdrive(file_id):
34
- url = f"https://drive.google.com/uc?id={file_id}&export=download"
35
- response = requests.get(url, stream=True)
36
-
37
- if response.status_code == 200:
38
- return io.BytesIO(response.content) # Return file content as a BytesIO object
39
- else:
40
- raise HTTPException(status_code=404, detail="Model not found")
41
-
42
- # Endpoint to load and use the model
43
- @app.get("/load-model")
44
- async def load_model():
45
- try:
46
- # Stream model data from Google Drive
47
- model_data = stream_model_from_gdrive(file_id)
48
-
49
- # Load the model from the streamed data using safetensors
50
- model = load_file(model_data)
51
-
52
- # Return a simple confirmation that the model is loaded
53
- return {"message": "Model loaded successfully!"}
54
-
55
- except Exception as e:
56
- raise HTTPException(status_code=500, detail=str(e))
57
-
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, HTTPException
2
+ from fastapi.responses import HTMLResponse
3
+ import base64
4
+
5
+ from models import load_models
6
+ from image_processing import process_image_with_models
7
+ from utils import validate_image, save_temp_image
8
+
9
+ # Initialize FastAPI
10
+ app = FastAPI()
11
+
12
+ # Load models at startup
13
+ models = load_models(device='cpu') # Use 'cuda' if GPU is available
14
+
15
+ @app.get("/", response_class=HTMLResponse)
16
+ async def read_root():
17
+ return """
18
+ <html>
19
+ <body>
20
+ <h1>Image Processing API</h1>
21
+ <p>Visit <a href="/docs">/docs</a> to see the API documentation.</p>
22
+ </body>
23
+ </html>
24
+ """
25
+
26
+ @app.post("/process/")
27
+ async def process_image(
28
+ file: UploadFile = File(...),
29
+ box_threshold: float = 0.05,
30
+ iou_threshold: float = 0.1
31
+ ):
32
+ try:
33
+ # Validate file type
34
+ if file.content_type not in ["image/png", "image/jpeg", "image/jpg"]:
35
+ raise HTTPException(
36
+ status_code=400,
37
+ detail="Invalid file type. Please upload a PNG or JPEG image."
38
+ )
39
+
40
+ # Read and validate image
41
+ content = await file.read()
42
+ image = validate_image(content)
43
+
44
+ # Process image
45
+ labeled_img, coordinates, parsed_content = process_image_with_models(
46
+ image,
47
+ models,
48
+ box_threshold,
49
+ iou_threshold
50
+ )
51
+
52
+ return {
53
+ "labeled_image": base64.b64encode(labeled_img).decode("utf-8"),
54
+ "parsed_content": "\n".join(parsed_content),
55
+ "coordinates": coordinates
56
+ }
57
+
58
+ except ValueError as e:
59
+ raise HTTPException(status_code=400, detail=str(e))
60
+ except Exception as e:
61
+ raise HTTPException(
62
+ status_code=500,
63
+ detail=f"An error occurred while processing the image: {str(e)}"
64
+ )
best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:118fa31760b4f4dfad2f236f577032d74d18cc2f382f888015153891f5f6d8e4
3
+ size 12222450
image_processing.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from PIL import Image
3
+ import base64
4
+ import io
5
+
6
+ def process_image_with_models(
7
+ image: Image.Image,
8
+ models: dict,
9
+ box_threshold: float = 0.05,
10
+ iou_threshold: float = 0.1
11
+ ) -> tuple:
12
+ """Process image with YOLO and captioning models."""
13
+ # Convert PIL Image to numpy array
14
+ img_array = np.array(image)
15
+
16
+ # Run YOLO detection
17
+ results = models['yolo_model'](img_array)
18
+
19
+ # Get bounding boxes and labels
20
+ boxes = results[0].boxes
21
+ coordinates = boxes.xyxy.cpu().numpy().tolist()
22
+
23
+ # Process with caption model
24
+ inputs = models['processor'](images=image, return_tensors="pt")
25
+ outputs = models['caption_model'].generate(
26
+ **inputs,
27
+ max_length=50,
28
+ num_beams=5,
29
+ early_stopping=True
30
+ )
31
+
32
+ # Decode captions
33
+ captions = models['processor'].batch_decode(outputs, skip_special_tokens=True)
34
+
35
+ # Create labeled image
36
+ img_with_boxes = results[0].plot()
37
+
38
+ # Convert numpy array to PIL Image and then to base64
39
+ labeled_img = Image.fromarray(img_with_boxes)
40
+ buffered = io.BytesIO()
41
+ labeled_img.save(buffered, format="PNG")
42
+ img_str = base64.b64encode(buffered.getvalue())
43
+
44
+ return img_str, coordinates, captions
models.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoProcessor, AutoModelForCausalLM
2
+ import torch
3
+ from ultralytics import YOLO
4
+
5
+ def load_models(device='cpu'):
6
+ """Initialize and load all required models."""
7
+ # Set default dtype for torch
8
+ torch.set_default_dtype(torch.float32)
9
+
10
+ yolo_model = YOLO('best.pt').to(device)
11
+
12
+ processor = AutoProcessor.from_pretrained(
13
+ "microsoft/Florence-2-base",
14
+ trust_remote_code=True
15
+ )
16
+
17
+ caption_model = AutoModelForCausalLM.from_pretrained(
18
+ "microsoft/OmniParser/icon_caption_florence",
19
+ torch_dtype=torch.float32, # Changed from float16 to float32
20
+ trust_remote_code=True
21
+ ).to(device)
22
+
23
+ return {
24
+ 'yolo_model': yolo_model,
25
+ 'processor': processor,
26
+ 'caption_model': caption_model
27
+ }
requirements.txt CHANGED
@@ -1,5 +1,18 @@
1
- safetensors
2
- torch
3
- requests
4
- fastapi
5
- uvicorn
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.95.1
2
+ uvicorn==0.23.0
3
+ transformers==4.30.0
4
+ torch==2.1.0
5
+ Pillow==9.5.0
6
+ ultralytics
7
+ pillow
8
+ torch>=1.10.0
9
+ ultralytics>=8.0.0
10
+ numpy>=1.24.0
11
+ opencv-python>=4.5.5
12
+ fastapi>=0.95.0
13
+ uvicorn>=0.21.0
14
+ click>=8.0.4
15
+ typing-extensions>=4.0.0
16
+ pydantic>=1.10.0
17
+ py7zr
18
+ dill
utils.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PIL import Image
3
+ import io
4
+
5
+ def validate_image(file_content: bytes) -> Image.Image:
6
+ """Validate and return a PIL Image."""
7
+ try:
8
+ # Open the image from byte content
9
+ image = Image.open(io.BytesIO(file_content))
10
+ image.verify() # Verify it's a valid image
11
+
12
+ # Reopen the image after verification to allow further manipulation
13
+ image = Image.open(io.BytesIO(file_content))
14
+
15
+ # Convert to RGB if image has alpha channel or is not RGB
16
+ if image.mode in ('RGBA', 'LA') or (image.mode != 'RGB'):
17
+ image = image.convert('RGB')
18
+
19
+ return image
20
+ except Exception as e:
21
+ # Handle any errors during image verification
22
+ raise ValueError(f"Invalid image file: {str(e)}")
23
+
24
+ def save_temp_image(image: Image.Image, path: str) -> str:
25
+ """Save image to temporary location."""
26
+ try:
27
+ # Ensure the directory exists
28
+ os.makedirs(os.path.dirname(path), exist_ok=True)
29
+
30
+ # Save the image to the given path
31
+ image.save(path)
32
+ return path
33
+ except Exception as e:
34
+ # Handle file saving errors
35
+ raise RuntimeError(f"Error saving image to {path}: {str(e)}")