Spaces:

miguelflor
/

limbsAI_API

Sleeping

App Files Files Community

Miguel Cid Flor commited on Feb 14, 2025

Commit

65f6a85

1 Parent(s): 92ca8c0

receiving an image and predicting it

Browse files

Files changed (5) hide show

Models.py +291 -0
PreProcessor.py +260 -0
app.py +41 -5
posev0.01126.pth +3 -0
yolov8n.pt +3 -0

Models.py ADDED Viewed

	@@ -0,0 +1,291 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[1]:
+import torch.nn as nn
+import torchvision.transforms as transforms
+# First Model
+# In[ ]:
+class PoseNetV1(nn.Module):
+    def __init__(self):
+        super(PoseNetV1, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 112x112
+            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 56x56
+            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 28x28
+        )
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(512 * 14 * 14, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 32)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.fc(x)
+        return x
+# Here, we added one more layer and we added Dropout to the fully connected layer. We also added a Flatten layer to flatten the output of the convolutional layers before passing it to the fully connected layers.
+# In[ ]:
+class PoseNetV2(nn.Module):
+    def __init__(self):
+        super(PoseNetV2, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 112x112
+            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 56x56
+            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 28x28
+            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 14x14
+        )
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(256 * 14 * 14, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 32)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.fc(x)
+        return x
+# In[ ]:
+class PoseNetV3(nn.Module):
+    def __init__(self):
+        super(PoseNetV3, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 112x112
+            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 56x56
+            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 28x28
+            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 14x14
+        )
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(256 * 14 * 14, 512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 32)
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.fc(x)
+        return x
+# We added batch normalization in each layer, Adaptive Pooling and a Tahn function at the end of the fully conected layers
+# In[ ]:
+class PoseNetV4(nn.Module):
+    def __init__(self):
+        super(PoseNetV4, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 112x112
+            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 56x56
+            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 28x28
+            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d((7, 7))  # Adaptive pooling to make output size consistent
+        )
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(256 * 7 * 7, 512),
+            nn.ReLU(),
+            nn.Dropout(0.4),  # Increased dropout to prevent overfitting
+            nn.Linear(512, 32),
+            nn.Tanh()  # Normalizing keypoint predictions
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.fc(x)
+        return x
+# 4 Layers -> 5 Layers
+#
+# Tahn() -> Sigmoid()
+# In[ ]:
+class PoseNetV5(nn.Module):
+    def __init__(self):
+        super(PoseNetV5, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(32),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 112x112
+            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(64),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 56x56
+            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(128),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 28x28
+            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(256),
+            nn.ReLU(),
+            nn.MaxPool2d(2, 2),  # 28x28
+            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
+            nn.BatchNorm2d(512),
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d((7, 7))  # Adaptive pooling to make output size consistent
+        )
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(512 * 7 * 7, 512),
+            nn.ReLU(),
+            nn.Dropout(0.50),  # Increased dropout to prevent overfitting
+            nn.Linear(512, 32),
+            nn.Sigmoid()  # Normalizing keypoint predictions
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.fc(x)
+        return x
+# In[ ]:
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super(ResidualBlock, self).__init__()
+        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.bn1 = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
+        self.bn2 = nn.BatchNorm2d(out_channels)
+        # Skip connection (identity mapping)
+        self.shortcut = nn.Sequential()
+        if in_channels != out_channels:
+            self.shortcut = nn.Sequential(
+                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0),
+                nn.BatchNorm2d(out_channels)
+            )
+    def forward(self, x):
+        out = self.relu(self.bn1(self.conv1(x)))
+        out = self.bn2(self.conv2(out))
+        out += self.shortcut(x)  # Adding the residual connection
+        out = self.relu(out)
+        return out
+class ResPoseNet(nn.Module):
+    def __init__(self):
+        super(ResPoseNet, self).__init__()
+        # Using residual blocks for feature extraction
+        self.conv = nn.Sequential(
+            ResidualBlock(3, 32),    # Initial Conv + Residual Block
+            nn.MaxPool2d(2, 2),      # 112x112
+            ResidualBlock(32, 64),   # Residual Block
+            nn.MaxPool2d(2, 2),      # 56x56
+            ResidualBlock(64, 128),  # Residual Block
+            nn.MaxPool2d(2, 2),      # 28x28
+            ResidualBlock(128, 256),  # Residual Block
+            nn.MaxPool2d(2, 2),      # 28x28
+            ResidualBlock(256, 512), # Residual Block
+            nn.AdaptiveAvgPool2d((7, 7))  # 14x14 output
+        )
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(512 * 7 * 7, 1024),
+            nn.ReLU(),
+            nn.Dropout(0.40),
+            nn.Linear(1024, 32),  # Assuming 16 keypoints, each with x, y = 32 values
+            nn.Sigmoid()  # Output keypoint coordinates between [0,1]
+        )
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.fc(x)
+        return x
+transform = transforms.Compose([
+    transforms.ToTensor(),  # Convert to tensor (3, 224, 224)
+    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])  # Normalize RGB
+])

PreProcessor.py ADDED Viewed

	@@ -0,0 +1,260 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[58]:
+import matplotlib.pyplot as plt
+import pandas as pd
+import cv2
+import numpy as np
+from ultralytics import YOLO
+yolo = YOLO("yolov8n.pt")
+# In[59]:
+#plt.imshow(cv2.imread("Datasets/images/000003072.jpg"))
+# In[60]:
+def resize_with_padding(points,image, target_size=(224, 224), padding_color=(0, 0, 0)):
+    h, w = image.shape[:2]
+    target_w, target_h = target_size
+    # Compute the scaling factor
+    scale = min(target_w / w, target_h / h)
+    new_w, new_h = int(w * scale), int(h * scale)
+    # Resize while maintaining aspect ratio
+    resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)
+    # Create a new blank image (padded) with the target size
+    padded_image = np.full((target_h, target_w, 3), padding_color, dtype=np.uint8)
+    #ajust points
+    points = [(int(x * scale + (target_w - new_w) // 2), int(y * scale + (target_h - new_h) // 2)) for x, y in points]
+    # Compute padding (center the image)
+    x_offset = (target_w - new_w) // 2
+    y_offset = (target_h - new_h) // 2
+    # Place the resized image onto the padded canvas
+    padded_image[y_offset:y_offset + new_h, x_offset:x_offset + new_w] = resized
+    #lambdas to reverse x and y
+    reverse = lambda lm,bm,x, y: (int((x - (target_w - new_w) // 2) / scale)+lm, int((y - (target_h - new_h) // 2) / scale)+bm)
+    return padded_image,points,reverse
+# In[61]:
+def get_persons(image,points):
+    results = yolo(image)
+    max = 0
+    crop = 0,0,0,0
+     # Get detected objects
+    i = 0
+    for result in results:
+        for box in result.boxes:
+            cls = int(box.cls[0].item())  # Get class ID
+            if cls == 0:  # Class '0' is "person" in COCO dataset
+                x1, y1, x2, y2 = map(int, box.xyxy[0].tolist())  #
+                if i == 0:
+                    crop = x1,y1,x2,y2
+                    i = 1
+                #if this area contains all the points of the person
+                sumed = sum([x1 <= x <= x2 and y1 <= y <= y2 for x, y in points])
+                if sumed > max:
+                    #plt.imshow(cropped_image)
+                    max = sumed
+                    crop = x1,y1,x2,y2
+    return crop
+# In[62]:
+def transform_data(name,points):
+    if isinstance(name, str):
+        image = cv2.imread(path)
+        path = "Datasets/images/"+name
+        if len(points) == 0:
+            path = name
+    else:
+        image = name
+    leftmost,bottommost,rightmost,topmost = get_persons(image,points)
+    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # Ensure the coordinates are within the image bounds
+    leftmost = max(leftmost, 0)
+    bottommost = max(bottommost, 0)
+    rightmost = min(rightmost, image.shape[1])
+    topmost = min(topmost, image.shape[0])
+    # Cut image from the points
+    image = image[bottommost:topmost, leftmost:rightmost]
+    # Adjust points coordinates
+    points = [(x - leftmost, y - bottommost) for x, y in points]
+    padded_image,new_points,reverse = resize_with_padding(points,image)
+    reverse_complete = lambda x, y: reverse(leftmost, bottommost,x, y )
+    return padded_image,new_points,reverse_complete
+    # Plot image
+# In[13]:
+# In[63]:
+#df = pd.read_csv('./Datasets/mpii_human_pose.csv')
+#df = df[df["NAME"]=="000003072.jpg"]
+# Load image using OpenCV (convert BGR to RGB for Matplotlib)
+keypoints = [
+    ("r ankle_X", "r ankle_Y"),
+    ("r knee_X", "r knee_Y"),
+    ("r hip_X", "r hip_Y"),
+    ("l hip_X", "l hip_Y"),
+    ("l knee_X", "l knee_Y"),
+    ("l ankle_X", "l ankle_Y"),
+    ("pelvis_X", "pelvis_Y"),
+    ("thorax_X", "thorax_Y"),
+    ("upper neck_X", "upper neck_Y"),
+    ("head top_X", "head top_Y"),
+    ("r wrist_X", "r wrist_Y"),
+    ("r elbow_X", "r elbow_Y"),
+    ("r shoulder_X", "r shoulder_Y"),
+    ("l shoulder_X", "l shoulder_Y"),
+    ("l elbow_X", "l elbow_Y"),
+    ("l wrist_X", "l wrist_Y")
+]
+## Select the first row (example: first image)
+#row = df.iloc[0]  # Change index for other images
+#
+## Convert keypoints into a list of (x, y) tuples
+#points = [(int(row[x]), int(row[y])) for x, y in keypoints]
+#image,points,reverse = transform_data("000003072.jpg",points)
+## Plot image
+#plt.imshow(image)
+#for (x, y) in points:
+#    plt.scatter(x, y, color="red", s=30)  # Red points
+#
+#plt.show()
+# In[64]:
+#original_points = [reverse(x,y) for x, y in points]
+#plt.imshow(cv2.imread("Datasets/images/000003072.jpg"))
+#for (x, y) in original_points:
+#    plt.scatter(x, y, color="red", s=30)  # Red points
+#
+#plt.show()
+# In[65]:
+# change the datasenumpy.core._exceptions._UFuncNoLoopError: ufunc 'add' did not contain a loop with signature matching types (dtype('<U16'), dtype('uint8')) -> None
+#t using a function that will return the image with the alterations and the new points
+def process_row(row):
+    points = [(int(row[x]), int(row[y])) for x, y in keypoints]
+    try:
+        image, points,_ = transform_data(row["NAME"], points)
+    except Exception as e:
+        print(f"Error processing row {row['ID']}: {e}")
+        row["image"] = None
+        return row
+    row["r ankle_X"], row["r ankle_Y"] = points[0]
+    row["r knee_X"], row["r knee_Y"] = points[1]
+    row["r hip_X"], row["r hip_Y"] = points[2]
+    row["l hip_X"], row["l hip_Y"] = points[3]
+    row["l knee_X"], row["l knee_Y"] = points[4]
+    row["l ankle_X"], row["l ankle_Y"] = points[5]
+    row["pelvis_X"], row["pelvis_Y"] = points[6]
+    row["thorax_X"], row["thorax_Y"] = points[7]
+    row["upper neck_X"], row["upper neck_Y"] = points[8]
+    row["head top_X"], row["head top_Y"] = points[9]
+    row["r wrist_X"], row["r wrist_Y"] = points[10]
+    row["r elbow_X"], row["r elbow_Y"] = points[11]
+    row["r shoulder_X"], row["r shoulder_Y"] = points[12]
+    row["l shoulder_X"], row["l shoulder_Y"] = points[13]
+    row["l elbow_X"], row["l elbow_Y"] = points[14]
+    row["l wrist_X"], row["l wrist_Y"] = points[15]
+    row["image"] = image
+    return row
+# In[66]:
+def process_dataset(name,df,numberRows):
+    df = pd.read_csv(name)
+    df= df[(df != -1).all(axis=1)]
+    df = df[:numberRows].apply(process_row, axis=1)
+    #takes a long TIME !! for me 1h 30 min
+    df.to_pickle('dataset'+str(df.shape[0])+'.pkl')
+    return df
+# In[ ]:
+#newDF = process_dataset("./Datasets/mpii_human_pose.csv")
+# In[ ]:
+#row = newDF.iloc[5]  # Change index for other images
+## Convert keypoints into a list of (x, y) tuples
+#points = [(int(row[x]), int(row[y])) for x, y in keypoints]
+#plt.imshow(row["image"])
+#for (x, y) in points:
+#    plt.scatter(x, y, color="red", s=30)  # Red points
+# In[ ]:
+#get all rows that have image null
+#df_nulls = newDF[newDF["image"].isnull()]
+## count how mutch image nulls it has
+#print(df_nulls.shape)
+#row = df_nulls.iloc[0]  # Change index for other images
+#points = [(int(row[x]), int(row[y])) for x, y in keypoints]
+#print(get_persons(cv2.imread("./Datasets/images/"+row["NAME"]),points))
+#plt.imshow(cv2.imread("./Datasets/images/"+row["NAME"]))
+#for (x, y) in points:
+#    plt.scatter(x, y, color="red", s=30)
+# In[47]:
+#df5000 = pd.read_pickle('dataset5000.pkl')
+#df6231 = pd.read_pickle('dataset6231.pkl')
+#df = pd.concat([df5000, df6231], ignore_index=True)
+#df.to_pickle('dataset11231.pkl')

app.py CHANGED Viewed

@@ -1,7 +1,43 @@
-from fastapi import FastAPI
 app = FastAPI()
-@app.get("/")
-def greet_json():
-    return {"Hello": "World!"}

+from fastapi import FastAPI, Request
+import cv2
+import numpy as np
+import base64
+from io import BytesIO
+from PIL import Image
+import torch  # or tensorflow
+from Models import ResPoseNet,transform
+from PreProcessor import transform_data
 app = FastAPI()
+# Load your model
+model = ResPoseNet()
+model.load_state_dict(torch.load('posev0.01126.pth', map_location=torch.device('cpu')))
+def predict_keypoints(image,model):
+    model.eval()
+    with torch.no_grad():
+        img_tensor = transform(image).unsqueeze(0)
+        output =model(img_tensor)*224
+    keypoints = output.squeeze()  # Remove extra dimension if necessary
+    points = [(keypoints[i].item(), keypoints[i+1].item()) for i in range(0, len(keypoints), 2)]
+    return points
+def decode_base64_image(data):
+    header, encoded = data.split(",", 1)
+    img = Image.open(BytesIO(base64.b64decode(encoded)))
+    return np.array(img)
+@app.post("/predict")
+async def predict(request: Request):
+    data = await request.json()
+    img = decode_base64_image(data["image"])
+    processed, _ , reverse = transform_data(img,[])
+    results = predict(processed,model)
+    keypoints = [reverse(x,y) for x, y in results]
+    return {"keypoints": keypoints.tolist()}

posev0.01126.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d634be5179db3e6d93307c7bcf50d22d343db094c9aa94df34b6eecee4741db
+size 122529293

yolov8n.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f59b3d833e2ff32e194b5bb8e08d211dc7c5bdf144b90d2c8412c47ccfc83b36
+size 6549796