Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| ############################################################################# | |
| # | |
| # This is the bulk of the logic for the gradio demo. You use it for whatever | |
| # you want. Credit would be nice but w/e | |
| # | |
| # You can also run it on an image from the cli | |
| # | |
| # TODO: | |
| # | |
| # 1. rework the classes that just wrap Dict and List to extend them | |
| # 2. cleanup all the to_dict madness | |
| # 3. convert the print calls to use the logging | |
| # 4. add a proper creative commons license | |
| # 5. cleanup string constants | |
| # 6. replace custom code with libraries like for OBJ | |
| # | |
| ############################################################################# | |
| import cv2 | |
| import json | |
| import logging | |
| import mediapipe as mp | |
| import numpy as np | |
| import os | |
| import sys | |
| import torch | |
| from mediapipe.framework.formats import landmark_pb2 | |
| from mediapipe.python.solutions.drawing_utils import _normalized_to_pixel_coordinates | |
| from PIL import Image, ImageDraw | |
| from transformers import DPTFeatureExtractor, DPTForDepthEstimation | |
| from typing import List, Mapping, Optional, Tuple, Union, Dict, Type | |
| from utils import colorize | |
| from quads import QUADS | |
| mp_face_mesh = mp.solutions.face_mesh | |
| mp_drawing = mp.solutions.drawing_utils | |
| mp_drawing_styles = mp.solutions.drawing_styles | |
| NumpyImage = Type[np.ndarray] | |
| DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| HF_HACK = True | |
| class Point3: | |
| def __init__(self, values:List[float]=3*[0] ): | |
| self.values = values | |
| def x(self): | |
| return self.values[0] | |
| def y(self): | |
| return self.values[1] | |
| def z(self): | |
| return self.values[2] | |
| def to_dict(self): | |
| return {'x':self.x,'y':self.y,'z':self.z} | |
| class TextureCoordinate: | |
| def __init__(self, values:List[float]=2*[0] ): | |
| self.values = values | |
| def u(self): | |
| return self.values[0] | |
| def v(self): | |
| return self.values[1] | |
| def to_dict(self): | |
| return {'u':self.u,'v':self.v} | |
| class PixelCoordinate: | |
| def __init__(self, values:List[int]=2*[0] ): | |
| self.values = values | |
| def x(self): | |
| return self.values[0] | |
| def y(self): | |
| return self.values[1] | |
| def to_dict(self): | |
| return {'x':self.x,'y':self.y} | |
| class DepthMap: | |
| MEDIA_PIPE = 'mediapipe' | |
| def __init__(self, values:Dict[str,float]={'og':0} ): | |
| self.values = values | |
| def to_dict(self): | |
| return self.values | |
| class DepthMapping: | |
| def __init__(self, weight:float=1, lo:float=+np.inf, hi:float=-np.inf, toLo:float=0, toHi:float=1): | |
| self.weight = weight | |
| self.lo = lo | |
| self.hi = hi | |
| self.toLo = toLo | |
| self.toHi = toHi | |
| self.diff = 1 | |
| self.toDiff = 1 | |
| self.update() | |
| def reset(self): | |
| self.lo = +np.inf | |
| self.hi = -np.inf | |
| def track(self,value): | |
| self.lo = min(self.lo,value) | |
| self.hi = max(self.hi,value) | |
| def update(self): | |
| self.diff = self.hi - self.lo | |
| self.toDiff = self.toHi - self.toLo | |
| return self | |
| def translate(self,value): | |
| if not self.diff == 0: | |
| value = ( value - self.lo ) / self.diff | |
| value = self.toLo + value * self.toDiff | |
| value = value * self.weight | |
| return value | |
| def to_dict(self): | |
| return { | |
| 'weight' : self.weight, | |
| 'lo' : self.lo, | |
| 'hi' : self.hi, | |
| 'toLo' : self.toLo, | |
| 'toHi' : self.toHi, | |
| 'diff' : self.diff, | |
| 'toDiff' : self.toDiff, | |
| } | |
| class WeightMap: | |
| def __init__(self, values:Dict[str,DepthMapping]=None): | |
| if values is None: | |
| self.values = {DepthMap.MEDIA_PIPE:DepthMapping()} | |
| else: | |
| self.values = values | |
| def set(self,key:str,depthMapping:DepthMapping): | |
| self.values[key] = depthMapping | |
| def totally(self,name:str): | |
| if not name in self.values: | |
| raise Exception( f'no weight for {k} in {self.to_dict()}' ) | |
| for depthMapping in self.values.values(): | |
| depthMapping.weight = 0 | |
| self.values[ name ].weight = 1 | |
| def saveWeights(self)->Dict[str,float]: | |
| return {k:v.weight for k,v in self.values.items()} | |
| def loadWeights(self,weights:Dict[str,float]): | |
| for k,weight in weights.items(): | |
| if k in self.values: | |
| self.values[ k ].weight = weight | |
| else: | |
| raise Exception( f'no weight for {k} in {self.to_dict()}' ) | |
| def to_dict(self): | |
| return {k:dm.to_dict() for k,dm in self.values.items()} | |
| return self.values | |
| class MeshPoint: | |
| def __init__(self, | |
| position:Point3 = Point3(), | |
| color:Point3 = Point3(), | |
| textureCoordinate:TextureCoordinate = TextureCoordinate(), | |
| pixelCoordinate:PixelCoordinate = PixelCoordinate(), | |
| depthMap:DepthMap = None, | |
| ): | |
| self.position = position | |
| self.color = color | |
| self.textureCoordinate = textureCoordinate | |
| self.pixelCoordinate = pixelCoordinate | |
| if depthMap is None: | |
| self.depthMap = DepthMap({DepthMap.MEDIA_PIPE:position.values[2]}) | |
| else: | |
| self.depthMap = depthMap | |
| def to_dict(self): | |
| derp = { | |
| 'position' : self.position.to_dict(), | |
| 'color' : self.color.to_dict(), | |
| 'textureCoordinate' : self.textureCoordinate.to_dict(), | |
| 'pixelCoordinate' : self.pixelCoordinate.to_dict(), | |
| } | |
| if not self.depthMap is None: | |
| derp[ 'depthMap' ] = self.depthMap.to_dict() | |
| return derp | |
| def weighDepth(self, weightMap:WeightMap = WeightMap()): | |
| total_sum = sum([dm.weight for dm in weightMap.values.values()]) | |
| tmp = 0 | |
| for key, depthMapping in weightMap.values.items(): | |
| if key in self.depthMap.values: | |
| tmp = tmp + depthMapping.translate( self.depthMap.values[ key ] ) | |
| else: | |
| raise Exception(f'{key} from weightMap not in depthMap') | |
| tmp = tmp / total_sum | |
| #print( f'depthMap: {json.dumps(self.depthMap.to_dict())} -> {tmp}') # spam!!! | |
| self.position.values[2] = tmp | |
| def mapLandMark(self, mediaMesh:'MediaMesh', landmark: landmark_pb2.NormalizedLandmark) -> 'MeshPoint': | |
| x, y = _normalized_to_pixel_coordinates(landmark.x,landmark.y,mediaMesh.width,mediaMesh.height) | |
| #position = [landmark.x * mediaMesh.ratio, landmark.y, landmark.z] | |
| #position = [landmark.x * mediaMesh.ratio, landmark.y, landmark.z] | |
| position = [v * mediaMesh.scale[i] for i,v in enumerate([landmark.x, landmark.y, landmark.z])] | |
| self.position = Point3(position) | |
| #self.position = Point3([landmark.x * mediaMesh.ratio, landmark.y, landmark.z]) | |
| self.color = Point3([value / 255 for value in mediaMesh.image[y,x]]) | |
| self.textureCoordinate = TextureCoordinate([x/mediaMesh.width,1-y/mediaMesh.height] ) | |
| self.pixelCoordinate = PixelCoordinate([x,y]) | |
| self.depthMap = DepthMap({DepthMap.MEDIA_PIPE:self.position.z}) | |
| return self | |
| def toObj(self, lines:List[str], hf_hack:bool=HF_HACK): | |
| lines.append( "v " + " ".join(map(str, self.position.values + self.color.values)) ) | |
| lines.append( "vt " + " ".join(map(str, self.textureCoordinate.values ) ) ) | |
| # IMPORTANT! MeshFace uses 1 based indices, not 0 based!!!! | |
| class MeshFace: | |
| def __init__(self,indices:List[int]=None,normal:Point3=Point3()): | |
| self.indices = indices | |
| self.normal = normal | |
| def calculateNormal(self,meshPoints:List[MeshPoint]): | |
| if self.indices is None: | |
| raise Exception('indices is junk') | |
| if meshPoints is None: | |
| raise Exception('meshPoints is junk') | |
| if len(self.indices)<3: | |
| raise Exception('need at least 3 points') | |
| points = [meshPoints[index-1] for index in self.indices[:3]] | |
| npz = [np.array(point.position.values) for point in points] | |
| v1 = npz[1] - npz[0] | |
| v2 = npz[2] - npz[0] | |
| normal = np.cross(v1, v2) | |
| normal = normal / np.linalg.norm(normal) | |
| self.normal = Point3( normal.tolist() ) | |
| def toObj(self, lines:List[str], index:int, hf_hack:bool=HF_HACK): | |
| lines.append( "vn " + " ".join([str(value) for value in self.normal.values]) ) | |
| face_uv = "f " + " ".join([f'{vertex}/{vertex}/{index}' for vertex in self.indices]) | |
| face_un = "f " + " ".join([str(vertex) for vertex in self.indices]) | |
| if hf_hack: | |
| lines.append( f'#{face_uv}' ) | |
| lines.append( f'{face_un}' ) | |
| else: | |
| lines.append( face_uv ) | |
| class DepthSource: | |
| def __init__(self, name:str=None): | |
| self.name = name | |
| self.mediaMesh = None | |
| self.depth:NumpyImage = None | |
| self.gray:NumpyImage = None | |
| def mapDepth(self, mediaMesh:'MediaMesh', depthMapping:DepthMapping=None) -> 'DepthSource': | |
| return self | |
| def _addDepth(self, mediaMesh:'MediaMesh', depthMapping:DepthMapping=None) -> 'DepthSource': | |
| self.gray = colorize(self.depth, cmap='gray_r') | |
| self.mediaMesh = mediaMesh | |
| for meshPoint in mediaMesh.points: | |
| depth = self.depth[meshPoint.pixelCoordinate.y,meshPoint.pixelCoordinate.x] | |
| #depth = -depth # lazy conversion from depth to position | |
| meshPoint.depthMap.values[ self.name ] = float( depth ) | |
| mediaMesh.weightMap.set( self.name, self.createDepthMapping(depthMapping) ) | |
| self.gray = mediaMesh.drawGrayMesh(self.name,True) | |
| return self | |
| # note: if depthMapping is passed in, the hi and lo will be reset | |
| def createDepthMapping(self,depthMapping:DepthMapping=None) -> DepthMapping: | |
| if depthMapping is None: | |
| depthMapping = DepthMapping() | |
| depthMapping.reset() | |
| if not self.depth is None: | |
| for meshPoint in self.mediaMesh.points: | |
| depth = self.depth[meshPoint.pixelCoordinate.y,meshPoint.pixelCoordinate.x] | |
| depthMapping.track(float(depth)) | |
| return depthMapping.update() | |
| class ZoeDepthSource( DepthSource ): | |
| NAME = 'zoe' | |
| def __init__(self): | |
| super().__init__(ZoeDepthSource.NAME) | |
| self.model = torch.hub.load('isl-org/ZoeDepth', "ZoeD_K", pretrained=True).to(DEVICE).eval() | |
| def mapDepth(self, mediaMesh:'MediaMesh', depthMapping:DepthMapping=None) -> 'DepthSource': | |
| self.depth = 1.-self.model.infer_pil(mediaMesh.image) | |
| return self._addDepth(mediaMesh, depthMapping) | |
| class MidasDepthSource( DepthSource ): | |
| NAME = 'midas' | |
| def __init__(self): | |
| super().__init__(MidasDepthSource.NAME) | |
| self.feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-large") | |
| self.model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") | |
| def mapDepth(self, mediaMesh:'MediaMesh', depthMapping:DepthMapping=None) -> 'DepthSource': | |
| img = Image.fromarray(mediaMesh.image) | |
| encoding = self.feature_extractor(img, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = self.model(**encoding) | |
| predicted_depth = outputs.predicted_depth | |
| prediction = torch.nn.functional.interpolate( | |
| predicted_depth.unsqueeze(1), | |
| size=img.size[::-1], | |
| mode="bicubic", | |
| align_corners=False, | |
| ).squeeze() | |
| self.depth = prediction.cpu().numpy() | |
| return self._addDepth(mediaMesh, depthMapping) | |
| ############################################################################# | |
| # | |
| # A MediaMesh has: | |
| # | |
| # 1. an input image | |
| # 2. the first landmark found | |
| # 3. a MeshPoint for each point | |
| # | |
| # | |
| # | |
| ############################################################################# | |
| class MediaMesh: | |
| LOG = logging.getLogger(__name__) | |
| COMBINED = 'combined' | |
| def __init__(self, scale:List[int]=[-1,-1,-1], weightMap:WeightMap = None, image:NumpyImage = None, annotated:NumpyImage = None, points:List[MeshPoint] = None): | |
| self.scale = scale | |
| if weightMap is None: | |
| self.weightMap = WeightMap() | |
| else: | |
| self.weightMap = weightMap | |
| self.image = image | |
| self.annotated = annotated | |
| self.points = points | |
| self.meshes = {} | |
| self.depthSources = {} | |
| # after this call, instance variables for image, annotated and points should be set | |
| def detect(self, image:NumpyImage, min_detection_confidence:float = .5) -> 'MediaMesh': | |
| self.image = image | |
| self.annotated = image.copy() | |
| self.points = None | |
| self.width = image.shape[1] | |
| self.height = image.shape[0] | |
| self.ratio = self.width / self.height | |
| self.scale[0] = self.ratio | |
| first = True # just do the first face for now | |
| with mp_face_mesh.FaceMesh( | |
| static_image_mode=True, | |
| max_num_faces=1, | |
| min_detection_confidence=min_detection_confidence) as face_mesh: | |
| results = face_mesh.process(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) | |
| if not results.multi_face_landmarks: | |
| raise Exception( 'no faces found' ) | |
| for landmarks in results.multi_face_landmarks: | |
| if first: | |
| self.points = self.mapLandMarks(landmarks) | |
| first = False | |
| self.drawLandMarks(self.annotated, landmarks) | |
| self.gray = self.drawGrayMesh() | |
| self.weightMap.set( DepthMap.MEDIA_PIPE, self.createDepthMapping() ) | |
| return self | |
| def drawLandMarks(self, image:NumpyImage, landmarks: landmark_pb2.NormalizedLandmarkList): | |
| drawing_spec = mp_drawing.DrawingSpec(thickness=1, circle_radius=1) | |
| mp_drawing.draw_landmarks( | |
| image=image, | |
| landmark_list=landmarks, | |
| connections=mp_face_mesh.FACEMESH_TESSELATION, | |
| landmark_drawing_spec=None, | |
| connection_drawing_spec=mp_drawing_styles | |
| .get_default_face_mesh_tesselation_style()) | |
| mp_drawing.draw_landmarks( | |
| image=image, | |
| landmark_list=landmarks, | |
| connections=mp_face_mesh.FACEMESH_CONTOURS, | |
| landmark_drawing_spec=None, | |
| connection_drawing_spec=mp_drawing_styles | |
| .get_default_face_mesh_contours_style()) | |
| def mapLandMarks(self, landmarks: landmark_pb2.NormalizedLandmarkList) -> List[MeshPoint]: | |
| points = [] | |
| for landmark in landmarks.landmark: | |
| point = MeshPoint().mapLandMark(self, landmark) | |
| points.append( point ) | |
| return self.centerPoints(points) | |
| def centerPoints(self,points:List[MeshPoint]=None) -> List[MeshPoint]: | |
| if points is None: | |
| points = self.points | |
| mins = [+np.inf] * 3 | |
| maxs = [-np.inf] * 3 | |
| for point in points: | |
| for dimension,value in enumerate( point.position.values ): | |
| mins[dimension] = min(mins[dimension],value) | |
| maxs[dimension] = max(maxs[dimension],value) | |
| mids = [(min_val + max_val) / 2 for min_val, max_val in zip(mins, maxs)] | |
| for point in points: | |
| point.position.values = [(val-mid) for val, mid in zip(point.position.values,mids)] | |
| print( f'mins: {mins}' ) | |
| print( f'mids: {mids}' ) | |
| print( f'maxs: {maxs}' ) | |
| return points | |
| def createDepthMapping(self,depthMapping:DepthMapping=None) -> DepthMapping: | |
| if depthMapping is None: | |
| depthMapping = DepthMapping() | |
| for point in self.points: | |
| depthMapping.track(point.position.z) | |
| return depthMapping.update() | |
| def drawGrayMesh(self, source:str=DepthMap.MEDIA_PIPE, invert:bool=False): | |
| image = Image.new("RGB", (self.width, self.height), (88,13,33)) | |
| draw = ImageDraw.Draw(image) | |
| minZ = np.inf | |
| maxZ = -np.inf | |
| depths = [] | |
| for point in self.points: | |
| depth = point.depthMap.values[source] | |
| depths.append( depth ) | |
| minZ = min( minZ, depth ) | |
| maxZ = max( maxZ, depth ) | |
| difZ = maxZ - minZ | |
| if 0 == difZ: | |
| difZ = 1 | |
| depths = [(depth-minZ)/difZ for depth in depths] | |
| for quad in QUADS: | |
| points = [tuple(self.points[index-1].pixelCoordinate.values) for index in quad] | |
| colors = [tuple(3*[int(255*depths[index-1])]) for index in quad] | |
| color = int(np.average(colors)) | |
| if invert: | |
| color = 255 - color | |
| draw.polygon(points, fill=tuple(3*[color])) | |
| #draw.polygon(points, fill=colors) # sadly this does not work | |
| return np.asarray(image) | |
| # the obj is based on the current weightMap | |
| def toObj(self, name:str='sweet', hf_hack:bool=HF_HACK): | |
| print( '-----------------------------------------------------------------------------' ) | |
| obj = [f'o {name}Mesh'] | |
| mtl = f'newmtl {name}Material\nmap_Kd {name}.png\n' | |
| c = '#' if hf_hack else '' | |
| obj.append( f'{c}mtllib {name}.mtl' ) | |
| obj.append( f'##################################################################' ) | |
| obj.append( f'# to bring into blender with uvs:' ) | |
| obj.append( f'# put the following 2 lines into {name}.mtl uncommented' ) | |
| obj.append( f'#newmtl {name}Material' ) | |
| obj.append( f'#map_Kd {name}.png' ) | |
| obj.append( f'# remove lines from this file starting with "f "' ) | |
| obj.append( f'# uncomment the lines that start with "#f "' ) | |
| obj.append( f'##################################################################' ) | |
| for key, depthMapping in self.weightMap.values.items(): | |
| depthMapping.update() | |
| print( f'{name}.{key} -> {depthMapping.to_dict()}' ) | |
| for point in self.points: | |
| point.weighDepth(self.weightMap) | |
| self.centerPoints() | |
| for point in self.points: | |
| point.toObj(obj,hf_hack) | |
| obj.append( f'usemtl {name}Material' ) | |
| index = 0 | |
| for quad in QUADS: | |
| index = 1 + index | |
| face = MeshFace(quad) | |
| face.calculateNormal(self.points) | |
| face.toObj(obj, index, hf_hack) | |
| obj.append( f'##################################################################' ) | |
| obj.append( f'# EOF' ) | |
| obj.append( f'##################################################################' ) | |
| print( '-----------------------------------------------------------------------------' ) | |
| return obj,mtl | |
| def to_dict(self): | |
| return { | |
| 'width' : self.width, | |
| 'height' : self.height, | |
| 'ratio' : self.ratio, | |
| 'weightMap' : {key: value.to_dict() for key, value in self.weightMap.values.items()}, | |
| 'points' : [point.to_dict() for point in self.points] | |
| } | |
| # should be called after demoSetup and detect | |
| def singleSourceMesh(self,name:str, hf_hack:bool=HF_HACK): | |
| before = self.weightMap.saveWeights() # push | |
| self.weightMap.totally(name) | |
| obj,mtl = self.toObj(name) | |
| self.weightMap.loadWeights( before ) # pop | |
| return obj,mtl | |
| # should be called after demoSetup and detect | |
| def meshmerizing(self,hf_hack:bool=HF_HACK): | |
| for depthSource in self.depthSources: | |
| depthSource.mapDepth(self,self.weightMap.values[depthSource.name]) | |
| obj,mtl = self.toObj(MediaMesh.COMBINED) | |
| self.meshes = {MediaMesh.COMBINED:(obj,mtl)} | |
| for source in self.depthSources: | |
| self.meshes[ source.name ] = (self.singleSourceMesh(source.name)) | |
| self.meshes[DepthMap.MEDIA_PIPE] = (self.singleSourceMesh(DepthMap.MEDIA_PIPE)) | |
| return self.meshes | |
| def demoSetup(self) -> 'MediaMesh': | |
| self.depthSources = [ ZoeDepthSource(), MidasDepthSource() ] | |
| for depthSource in self.depthSources: | |
| self.weightMap.set( depthSource.name, depthSource.createDepthMapping() ) | |
| # observationally | |
| self.weightMap.values[ ZoeDepthSource.NAME ].toHi = 1.77 | |
| self.weightMap.values[ MidasDepthSource.NAME ].toHi = 2.55 | |
| self.weightMap.values[ ZoeDepthSource.NAME ].weight = 1.00 | |
| self.weightMap.values[ MidasDepthSource.NAME ].weight = 0.22 | |
| return self | |
| def main(self): | |
| if not 2 == len(sys.argv): | |
| raise Exception( 'usage: MediaMesh.py <image filename>' ) | |
| mediaMesh = MediaMesh().demoSetup() | |
| mediaMesh.detect(cv2.imread( sys.argv[1] ) ) | |
| for name,mesh in mediaMesh.meshmerizing().items(): | |
| obj = mesh[0] | |
| mtl = mesh[1] | |
| with open(f"{name}.obj", "w") as file: | |
| file.write( '\n'.join(obj) ) | |
| with open(f"{name}.mtl", "w") as file: | |
| file.write( mtl ) | |
| cv2.imwrite( 'mesh.png', mediaMesh.annotated ) | |
| cv2.imwrite( 'mpg.png', mediaMesh.gray ) | |
| for source in mediaMesh.depthSources: | |
| cv2.imwrite( f'{source.name}.png', source.gray ) | |
| with open("mesh.json", "w") as file: | |
| json.dump(mediaMesh.to_dict(), file, indent=4) | |
| if __name__ == "__main__": | |
| MediaMesh().main() | |
| # EOF | |
| ############################################################################# | |