Spaces:

flappybird1084
/

terrain-generation

Sleeping

App Files Files Community

flappybird1084 commited on Aug 1, 2025

Commit

6caa466

1 Parent(s): 027ca38

add initial files

Browse files

Files changed (9) hide show

README.md +15 -13
app.py +557 -0
requirements.txt +9 -0
train_heightmap.py +114 -0
train_terrain.py +118 -0
util/__pycache__/dataset.cpython-311.pyc +0 -0
util/__pycache__/unet.cpython-311.pyc +0 -0
util/dataset.py +44 -0
util/unet.py +69 -0

README.md CHANGED Viewed

@@ -1,13 +1,15 @@
----
-title: Terrain Generation
-emoji: 🔥
-colorFrom: green
-colorTo: blue
-sdk: gradio
-sdk_version: 5.39.0
-app_file: app.py
-pinned: false
-short_description: '3D terrain generator from 2D segmentation mask. '
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+Terrain Reconstruction
+```bash
+pyenv shell 3.11
+python3 -m venv env
+source env/bin/activate
+pip install -r requirements.txt
+mkdir -p models/terrain
+python3 train_heightmap.py
+python3 train_terrain.py
+```
+CUDA/MPS advised.

app.py ADDED Viewed

	@@ -0,0 +1,557 @@

+from mpl_toolkits.mplot3d import Axes3D
+import matplotlib.pyplot as plt
+import gradio as gr
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.transforms as transforms
+from PIL import Image
+import numpy as np
+import os
+import matplotlib
+import base64
+import tempfile
+import trimesh
+from io import BytesIO
+import io
+# Set the matplotlib backend to 'Agg' for non-interactive plotting in a server environment.
+matplotlib.use('Agg')
+# Define the DoubleConv and UNet classes exactly as in your notebook
+class DoubleConv(nn.Module):
+    """(convolution => [BN] => ReLU) * 2"""
+    def __init__(self, in_channels, out_channels):
+        super(DoubleConv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.conv(x)
+class UNet(nn.Module):
+    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
+        super(UNet, self).__init__()
+        self.encoder = nn.ModuleList()
+        self.decoder = nn.ModuleList()
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
+        # Encoder (Downsampling path)
+        for feature in features:
+            self.encoder.append(DoubleConv(in_channels, feature))
+            in_channels = feature
+        # Bottleneck
+        self.bottleneck = DoubleConv(features[-1], features[-1] * 2)
+        # Decoder (Upsampling path)
+        for feature in reversed(features):
+            self.decoder.append(nn.ConvTranspose2d(
+                feature * 2, feature, kernel_size=2, stride=2))
+            self.decoder.append(DoubleConv(feature * 2, feature))
+        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
+    def forward(self, x):
+        skip_connections = []
+        # Encode
+        for layer in self.encoder:
+            x = layer(x)
+            skip_connections.append(x)
+            x = self.pool(x)
+        # Bottleneck
+        x = self.bottleneck(x)
+        skip_connections = skip_connections[::-1]
+        # Decode
+        for idx in range(0, len(self.decoder), 2):
+            x = self.decoder[idx](x)  # Upsampling conv
+            skip_connection = skip_connections[idx // 2]
+            # Resize if necessary
+            if x.shape != skip_connection.shape:
+                x = F.interpolate(
+                    x, size=skip_connection.shape[2:], mode='bilinear', align_corners=True)
+            # Concatenate skip connection
+            concat_skip = torch.cat((skip_connection, x), dim=1)
+            x = self.decoder[idx + 1](concat_skip)  # DoubleConv
+        return self.final_conv(x)
+# Helper function to convert PIL image to base64 data URI
+def generate_mesh_from_images(heightmap_img, texture_img, max_height=100.0):
+    """
+    Convert heightmap (PIL.Image) and texture map (PIL.Image) into 3D mesh data.
+    Args:
+        heightmap_img (PIL.Image): Grayscale image for heightmap.
+        texture_img (PIL.Image): Texture image (color) to map with UV coords.
+        max_height (float): Maximum elevation represented in the mesh.
+    Returns:
+        dict: {
+            'vertices': List of (x, y, z) tuples,
+            'uvs': List of (u, v) tuples,
+            'faces': List of (v0, v1, v2) tuples (index-based),
+            'dimensions': (width, height)
+        }
+    """
+    # Ensure both images are the same size
+    if heightmap_img.size != texture_img.size:
+        raise ValueError("Heightmap and texture must be the same dimensions.")
+    width, height = heightmap_img.size
+    # Convert heightmap to NumPy array and normalize
+    height_data = np.asarray(heightmap_img.convert('L'),
+                             dtype=np.float32) / 255.0
+    height_data *= max_height
+    vertices = []
+    uvs = []
+    faces = []
+    for y in range(height):
+        for x in range(width):
+            z = height_data[y][x]
+            vertices.append((x, z, y))  # World position
+            uvs.append((x / (width - 1), y / (height - 1)))  # UV coords
+    for y in range(height - 1):
+        for x in range(width - 1):
+            i = y * width + x
+            i_right = i + 1
+            i_bottom = i + width
+            i_diag = i_bottom + 1
+            # First triangle
+            faces.append((i, i_bottom, i_right))
+            # Second triangle
+            faces.append((i_right, i_bottom, i_diag))
+    return {
+        'vertices': vertices,
+        'uvs': uvs,
+        'faces': faces,
+        'dimensions': (width, height)
+    }
+def mesh_to_obj_string(mesh_data):
+    vertices = mesh_data['vertices']
+    uvs = mesh_data['uvs']
+    faces = mesh_data['faces']
+    lines = []
+    # Write vertices
+    for v in vertices:
+        lines.append(f"v {v[0]:.6f} {v[1]:.6f} {v[2]:.6f}")
+    # Write UVs (texture coordinates)
+    for uv in uvs:
+        # flip V for OBJ format
+        lines.append(f"vt {uv[0]:.6f} {1.0 - uv[1]:.6f}")
+    # Write faces (referencing vertex and UV indices, 1-based)
+    for f in faces:
+        # OBJ face format: f v1/vt1 v2/vt2 v3/vt3
+        v1, v2, v3 = f
+        lines.append(f"f {v1+1}/{v1+1} {v2+1}/{v2+1} {v3+1}/{v3+1}")
+    # Join into OBJ text
+    obj_text = '\n'.join(lines)
+    return obj_text
+# def mesh_to_obj_file(mesh_data):
+#     obj_str = mesh_to_obj_string(mesh_data)
+#     tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".obj", mode="w")
+#     tmp_file.write(obj_str)
+#     tmp_file.close()
+#     print(tmp_file.name)
+#     # return tmp_file.name  # Return file path as string
+def mesh_to_obj_file(mesh_data, texture_img):
+    obj_str = mesh_to_obj_string(mesh_data)
+    # Create a temporary folder to hold all files
+    temp_dir = tempfile.mkdtemp()
+    obj_path = os.path.join(temp_dir, "model.obj")
+    mtl_path = os.path.join(temp_dir, "model.mtl")
+    texture_path = os.path.join(temp_dir, "texture.png")
+    # Save texture image
+    texture_img.save(texture_path)
+    # Write MTL file
+    with open(mtl_path, 'w') as f:
+        f.write(
+            "newmtl material0\n"
+            "Ka 1.000 1.000 1.000\n"
+            "Kd 1.000 1.000 1.000\n"
+            "Ks 0.000 0.000 0.000\n"
+            "d 1.0\n"
+            "illum 2\n"
+            "map_Kd texture.png\n"
+        )
+    # Write OBJ file with reference to MTL
+    with open(obj_path, 'w') as f:
+        f.write("mtllib model.mtl\n")
+        f.write("usemtl material0\n")
+        f.write(obj_str)
+    return obj_path  # Only return OBJ path; Gradio Model3D will find .mtl and texture if in same folder
+# def render_3d_model(heightmap_img, texture_img):
+#     mesh = generate_mesh_from_images(heightmap_img, texture_img)
+#     obj_file = mesh_to_obj_file(mesh)
+#     return obj_file
+def render_3d_model(heightmap_img, texture_img):
+    mesh = generate_mesh_from_images(heightmap_img, texture_img)
+    obj_file_path = mesh_to_obj_file(mesh, texture_img)
+    return obj_file_path  # path to .obj file with full material and texture
+# def render_3d_model_glb(heightmap_img, texture_img, max_height=100.0):
+#     mesh_data = generate_mesh_from_images(heightmap_img, texture_img, max_height)
+#     vertices = np.array(mesh_data['vertices'], dtype=np.float32)
+#     faces = np.array(mesh_data['faces'], dtype=np.int64)
+#     uvs = np.array(mesh_data['uvs'], dtype=np.float32)
+#     # Convert heightmap + uvs into a mesh
+#     mesh = trimesh.Trimesh(vertices=vertices, faces=faces, process=False)
+#     mesh.visual = trimesh.visual.TextureVisuals(uv=uvs)
+#     # Save the texture to a temporary file
+#     temp_folder = tempfile.mkdtemp()
+#     texture_path = os.path.join(temp_folder, "diffuse.png")
+#     texture_img.save(texture_path)
+#     material = trimesh.visual.material.PBRMaterial(
+#         baseColorTexture=trimesh.visual.texture.TextureVisuals(image=texture_path)
+#     )
+#     # Apply material (optional: set mesh.visual with material directly)
+#     mesh.visual.material = material
+#     # Assemble into a scene
+#     scene = trimesh.Scene()
+#     scene.add_geometry(mesh)
+#     # Export to glb
+#     glb_path = os.path.join(temp_folder, "terrain.glb")
+#     scene.export(glb_path, file_type='glb')
+#     return glb_path
+def render_3d_model_glb(heightmap_img, texture_img, max_height=70.0):
+    mesh_data = generate_mesh_from_images(
+        heightmap_img, texture_img, max_height)
+    texture_img_flipped = texture_img.transpose(Image.FLIP_TOP_BOTTOM)
+    texture_img = texture_img_flipped
+    vertices = np.array(mesh_data['vertices'], dtype=np.float32)
+    faces = np.array(mesh_data['faces'], dtype=np.int64)
+    uvs = np.array(mesh_data['uvs'], dtype=np.float32)
+    # Create Trimesh object
+    mesh = trimesh.Trimesh(vertices=vertices, faces=faces, process=False)
+    # Assign UV coordinates
+    mesh.visual = trimesh.visual.TextureVisuals(uv=uvs)
+    # Save texture to PNG in memory
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tex_file:
+        texture_img.save(tex_file.name)
+        tex_filepath = tex_file.name
+    # Apply texture using visual.material
+    mesh.visual.material.image = texture_img  # PIL Image object
+    # Build scene
+    scene = trimesh.Scene()
+    scene.add_geometry(mesh)
+    # Write GLB
+    glb_path = os.path.join(tempfile.mkdtemp(), "terrain.glb")
+    scene.export(glb_path, file_type='glb')
+    return glb_path
+# --- Model and Presets Loading ---
+script_dir = os.path.dirname(os.path.abspath(__file__))
+heightmap_model_path = os.path.join(
+    script_dir, './models/terrain/turbo_heightmap_unet_model.pth')
+terrain_model_path = os.path.join(
+    script_dir, './models/terrain/turbo_terrain_unet_model.pth')
+presets_folder_path = os.path.join(script_dir, './presets')
+# device = torch.device("cpu")
+# device = torch.device("mps")
+# device = torch.device("mps" if torch.backends.mps.is_available(
+# ) else "cuda" if torch.cuda.is_available() else "cpu")
+if torch.backends.mps.is_available():
+    device = torch.device("mps")
+elif torch.cuda.is_available():
+    device = torch.device("cuda")
+else:
+    device = torch.device("cpu")
+# Initialize models with the correct architecture
+heightmap_gen_model = UNet(in_channels=3, out_channels=1, features=[
+                           64, 128, 256, 512, 1024]).to(device)
+terrain_gen_model = UNet(in_channels=3, out_channels=3).to(device)
+try:
+    print(f"Attempting to load heightmap model from: {heightmap_model_path}")
+    heightmap_gen_model.load_state_dict(torch.load(
+        heightmap_model_path, map_location=device))
+    print(f"Attempting to load terrain model from: {terrain_model_path}")
+    terrain_gen_model.load_state_dict(torch.load(
+        terrain_model_path, map_location=device))
+    print("--- Models loaded successfully. ---")
+except Exception as e:
+    print(f"FATAL: Could not load models. Error: {e}")
+    exit()
+# Load preset image paths
+example_paths = []
+if os.path.exists(presets_folder_path):
+    for filename in os.listdir(presets_folder_path):
+        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
+            example_paths.append(os.path.join(presets_folder_path, filename))
+    print(f"Found {len(example_paths)} preset images in {presets_folder_path}")
+else:
+    # print(f"WARNING: Presets folder not found at {
+    #       presets_folder_path}. No examples will be loaded.")
+    print("no presets found!! oh noes")
+# Define the image transformation pipeline
+transform_pipeline = transforms.Compose([
+    transforms.Resize((256, 256)),
+    transforms.ToTensor(),
+])
+def generate_3d_plot(heightmap_np, terrain_np, elev, azim):
+    """
+    Generates a 3D surface plot from a heightmap and a terrain color map.
+    """
+    heightmap_gray = heightmap_np.squeeze()
+    # Prepare for 3D plotting
+    rows, cols = heightmap_gray.shape
+    X, Y = np.meshgrid(np.arange(cols), np.arange(rows))
+    Z = heightmap_gray.astype(np.float32)
+    # Normalize terrain colors for facecolors
+    normal_map_facecolors = terrain_np / 255.0
+    # Create 3D plot
+    fig = plt.figure(figsize=(8, 6))
+    ax = fig.add_subplot(111, projection='3d')
+    # [X, Y, Z] ratio; make Z axis 30% the scale of X/Y
+    ax.set_box_aspect([1, 1, 0.3])
+    # Plot the surface with a stride for performance
+    # ax.plot_surface(X, Y, Z, facecolors=normal_map_facecolors, rstride=4, cstride=4, linewidth=0, antialiased=False)
+    ax.plot_surface(X, Y, Z, facecolors=normal_map_facecolors,
+                    rstride=2, cstride=2, linewidth=0, antialiased=False)
+    # Set view and labels using slider values
+    ax.view_init(elev=elev, azim=azim)
+    ax.set_xlabel('X')
+    ax.set_ylabel('Y')
+    ax.set_zlabel('Z (Elevation)')
+    ax.set_title("3D Rendered Terrain")
+    plt.tight_layout()
+    return fig
+def gaussian_blur(tensor, kernel_size=5, sigma=1.0):
+    # Create 1D Gaussian kernel
+    def get_gaussian_kernel1d(k, s):
+        x = torch.arange(-k//2 + 1., k//2 + 1.)
+        kernel = torch.exp(-x**2 / (2*s**2))
+        kernel /= kernel.sum()
+        return kernel
+    kernel_1d = get_gaussian_kernel1d(kernel_size, sigma).to(tensor.device)
+    kernel_2d = torch.outer(kernel_1d, kernel_1d)
+    # Expand to match conv2d weight shape: [out_channels, in_channels, H, W]
+    c = tensor.shape[1]
+    weight = kernel_2d.expand(c, 1, kernel_size, kernel_size)
+    # Apply padding so spatial dims are preserved
+    padding = kernel_size // 2
+    blurred = F.conv2d(tensor, weight, padding=padding, groups=c)
+    return blurred
+def predict(input_image_pil, elevation, azimuth):
+    """
+    Takes a single input image and view angles, generates heightmap
+    and terrain, and creates a 3D plot.
+    """
+    if input_image_pil is None:
+        # Return blank outputs if no image is provided
+        blank_image = Image.new('RGB', (256, 256), 'white')
+        blank_plot = plt.figure()
+        plt.plot([])
+        return blank_image, blank_image, blank_plot
+        # threejs_html = generate_threejs_html(heightmap_image, terrain_image)
+        # return heightmap_image, terrain_image, plot_3d, threejs_html
+        # Ensure it's in RGB format
+    input_image_pil = input_image_pil.convert("RGB")
+    input_tensor = transform_pipeline(input_image_pil).unsqueeze(0).to(device)
+    with torch.no_grad():
+        heightmap_gen_model.eval()
+        terrain_gen_model.eval()
+        generated_heightmap_tensor = heightmap_gen_model(input_tensor)
+        # apply gaussian blur on hm tensor
+        generated_heightmap_tensor = gaussian_blur(
+            generated_heightmap_tensor, kernel_size=5, sigma=1.2)
+        generated_terrain_tensor = terrain_gen_model(input_tensor)
+        generated_terrain_tensor = gaussian_blur(
+            generated_terrain_tensor, kernel_size=5, sigma=1.1)
+    # Post-process for 2D image outputs
+    heightmap_np = generated_heightmap_tensor.squeeze(
+        0).cpu().permute(1, 2, 0).numpy()
+    terrain_np = generated_terrain_tensor.squeeze(
+        0).cpu().permute(1, 2, 0).numpy()
+    heightmap_np_viz = (heightmap_np - heightmap_np.min()) / \
+        (heightmap_np.max() - heightmap_np.min())
+    terrain_np_viz = (terrain_np - terrain_np.min()) / \
+        (terrain_np.max() - terrain_np.min())
+    heightmap_image = Image.fromarray(
+        (heightmap_np_viz * 255).astype(np.uint8).squeeze(), 'L')
+    terrain_image = Image.fromarray((terrain_np_viz * 255).astype(np.uint8))
+    # Generate the 3D plot using the numpy arrays and slider values
+    plot_3d = generate_3d_plot(
+        heightmap_np_viz, (terrain_np_viz * 255).astype(np.uint8), elevation, azimuth)
+    # Close the figure to free up memory
+    plt.close(plot_3d)
+    # threejs_html = generate_threejs_html(heightmap_image, terrain_image)
+    # threejs_html = generate_3d_terrain(heightmap_image, terrain_image)
+    # object_3d=render_3d_model(heightmap_image, terrain_image)
+    object_3d = render_3d_model_glb(heightmap_image, terrain_image)
+    return heightmap_image, terrain_image, plot_3d, object_3d
+# Create the Gradio Interface
+with gr.Blocks() as iface:
+    gr.Markdown("# 2D and 3D Terrain Generator")
+    gr.Markdown("Upload, draw, or choose a preset segmentation map to generate a 2D heightmap, a 2D terrain image, and a 3D rendered terrain.")
+    with gr.Row():
+        with gr.Column():
+            with gr.Tabs():
+                with gr.Tab("Upload & Presets"):
+                    input_img_upload = gr.Image(
+                        type="pil", label="Input Segmentation Map")
+                    if example_paths:
+                        gr.Examples(
+                            examples=example_paths,
+                            inputs=input_img_upload,
+                            label="Preset Segmentation Maps"
+                        )
+                with gr.Tab("Draw"):
+                    terrain_colors = [
+                        "#118DD7",  # Water 💧
+                        "#E1E39B",  # Grassland 🌾
+                        "#7FAD7B",  # Forest 🌲
+                        "#B97A57",  # Hills ⛰️
+                        "#E6C8B5",  # Desert 🏜️
+                        "#969696",  # Mountain 🏔️
+                        "#C1BEAF"   # Tundra ❄️
+                    ]
+                    sketchpad = gr.ImageEditor(
+                        type="pil", label="Draw Segmentation Map", height=512, width=512, brush=gr.Brush(colors=terrain_colors))
+            elevation_slider = gr.Slider(
+                minimum=0, maximum=90, value=30, step=1, label="Elevation Angle")
+            azimuth_slider = gr.Slider(
+                minimum=0, maximum=360, value=45, step=1, label="Azimuth Angle")
+            btn = gr.Button("Generate")
+        with gr.Column():
+            output_heightmap = gr.Image(
+                type="pil", label="Generated Heightmap (2D)")
+            output_terrain = gr.Image(
+                type="pil", label="Generated Terrain (2D)")
+            output_plot = gr.Plot(label="Generated Terrain (3D)")
+            output_3d_viewer = gr.Model3D(
+                label="Generated 3D Object (not particularly accurate)")
+            # output_viewer = gr.HTML(label="Interactive Three.js Terrain")
+    # Wrapper function to decide which input to use
+    def wrapper_predict(uploaded_img, drawn_img_dict, elevation, azimuth):
+        image_to_use = None
+        # Check if the user has drawn something meaningful
+        if drawn_img_dict and drawn_img_dict["composite"] is not None:
+            image_to_use = drawn_img_dict["composite"]
+        # Otherwise, fall back to the uploaded image
+        elif uploaded_img is not None:
+            image_to_use = uploaded_img
+        return predict(image_to_use, elevation, azimuth)
+    # The 'Generate' button triggers the prediction
+    btn.click(
+        fn=wrapper_predict,
+        inputs=[input_img_upload, sketchpad, elevation_slider, azimuth_slider],
+        outputs=[output_heightmap, output_terrain,
+                 output_plot, output_3d_viewer]
+    )
+    # Allow sliders to update the plot interactively when released
+    elevation_slider.release(
+        fn=wrapper_predict,
+        inputs=[input_img_upload, sketchpad, elevation_slider, azimuth_slider],
+        outputs=[output_heightmap, output_terrain, output_plot]
+    )
+    azimuth_slider.release(
+        fn=wrapper_predict,
+        inputs=[input_img_upload, sketchpad, elevation_slider, azimuth_slider],
+        outputs=[output_heightmap, output_terrain, output_plot]
+    )
+# Launch the app
+if __name__ == "__main__":
+    iface.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+torch
+torchvision
+matplotlib
+trimesh
+pygltflib
+numpy
+seaborn
+gradio
+pillow

train_heightmap.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import torch
+import torch.optim as optim
+import torch.nn as nn
+from util.unet import UNet
+import torchvision.transforms as transforms
+import util.dataset as ds
+from torch.utils.data import random_split
+from torch.utils.data import DataLoader
+import torchvision.models as models
+# change for your own dataset path.
+# dataset: https://www.kaggle.com/datasets/tpapp157/earth-terrain-height-and-segmentation-map-images
+dataset_path = "../../Other/cosmos/data/terrain_reconstruction/_dataset/"
+transform_pipeline = transforms.Compose([
+    transforms.Resize((128, 128)),
+    transforms.ToTensor(),
+    # transforms.Normalize(mean=[0.5], std=[0.5]),
+    # transforms.Normalize(mean=[0.485, 0.456, 0.406],
+    #                      std=[0.229, 0.224, 0.225])
+])
+dataset = ds.TerrainDataset(dataset_path, transform=transform_pipeline)
+# Example: 80% train, 20% test
+train_size = int(0.8 * len(dataset))
+test_size = len(dataset) - train_size
+dataset_train, dataset_test = random_split(dataset, [train_size, test_size])
+# from unet import UNet
+device = torch.device("mps" if torch.backends.mps.is_available(
+) else "cuda" if torch.cuda.is_available() else "cpu")
+# initialize dataloaders
+numworkers = 0
+batchsize = 8
+train_loader = DataLoader(
+    dataset_train, batch_size=batchsize, shuffle=True, num_workers=numworkers)
+test_loader = DataLoader(dataset_test, batch_size=batchsize,
+                         shuffle=False, num_workers=numworkers)
+class PerceptualLoss(nn.Module):
+    def __init__(self, feature_layer=9):
+        super(PerceptualLoss, self).__init__()
+        vgg = models.vgg16(
+            weights=models.VGG16_Weights.DEFAULT).features[:feature_layer].eval()
+        for param in vgg.parameters():
+            param.requires_grad = False
+        self.vgg = vgg.to(device)
+        self.transform = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                              std=[0.229, 0.224, 0.225])
+    def forward(self, pred, target):
+        pred = self.transform(pred)
+        target = self.transform(target)
+        return nn.functional.mse_loss(self.vgg(pred), self.vgg(target))
+def total_variation_loss(x):
+    return torch.mean(torch.abs(x[:, :, :, :-1] - x[:, :, :, 1:])) + \
+        torch.mean(torch.abs(x[:, :, :-1, :] - x[:, :, 1:, :]))
+unet_model = UNet(in_channels=3, out_channels=1, use_sigmoid=False, features=[
+                  64, 128, 256, 512, 1024]).to(device)
+mse_loss = nn.MSELoss()
+perceptual_loss = PerceptualLoss().to(device)
+perceptual_loss_scaling_factor = 0.1
+optimizer = optim.Adam(unet_model.parameters(), lr=0.001)
+# unet_model.load_state_dict(torch.load('./models/terrain/heightmap_unet_model.pth'))
+num_epochs = 5
+for epoch in range(num_epochs):
+    unet_model.train()
+    running_loss = 0.0
+    for i, (height, terrain, segmentation) in enumerate(train_loader):
+        images = segmentation
+        images = images.to(device).float()
+        target_images = height
+        target_images = target_images.to(device).float()
+        # Forward pass
+        outputs = unet_model(images)
+        # print(f"Outputs shape: {outputs.shape}, Target shape: {target_images.shape}")
+        # print(f"outputs {outputs}")
+        # print(f"target_images {target_images}")
+        # loss = criterion(outputs, target_images)
+        # Convert [B, 1, H, W] → [B, 3, H, W]
+        outputs_rgb = outputs.repeat(1, 3, 1, 1)
+        targets_rgb = target_images.repeat(1, 3, 1, 1)
+        # loss = mse_loss(outputs/65535, target_images/65535) + perceptual_loss(outputs/65535, target_images/65535) * perceptual_loss_scaling_factor
+        tv_weight = 1e-6
+        loss = (mse_loss(outputs/65535, target_images/65535) + perceptual_loss_scaling_factor *
+                perceptual_loss(outputs_rgb/65535, targets_rgb/65535) + tv_weight * total_variation_loss(outputs/65535))
+        # TODO: ADD PERCEPTUAL LOSS
+        running_loss += loss.item()
+        # Backward pass and optimization
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        if (i + 1) % 10 == 0:
+            print('Epoch ', (epoch + 1/num_epochs), "Step",
+                  ((i + 1)/len(train_loader)), "Loss:", (loss.item()))
+torch.save(unet_model.state_dict(),
+           './models/terrain/turbo_heightmap_unet_model.pth')
+print("Model saved to './models/terrain/turbo_heightmap_unet_model.pth'")

train_terrain.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import torch
+import torch.optim as optim
+import torch.nn as nn
+from util.unet import UNet
+import torchvision.transforms as transforms
+import util.dataset as ds
+from torch.utils.data import random_split
+from torch.utils.data import DataLoader
+import torchvision.models as models
+dataset_path = "../../Other/cosmos/data/terrain_reconstruction/_dataset/"
+transform_pipeline = transforms.Compose([
+    transforms.Resize((128, 128)),
+    transforms.ToTensor(),
+    # transforms.Normalize(mean=[0.5], std=[0.5]),
+    # transforms.Normalize(mean=[0.485, 0.456, 0.406],
+    #                      std=[0.229, 0.224, 0.225])
+])
+dataset = ds.TerrainDataset(dataset_path, transform=transform_pipeline)
+# Example: 80% train, 20% test
+train_size = int(0.8 * len(dataset))
+test_size = len(dataset) - train_size
+dataset_train, dataset_test = random_split(dataset, [train_size, test_size])
+# from unet import UNet
+device = torch.device("mps" if torch.backends.mps.is_available(
+) else "cuda" if torch.cuda.is_available() else "cpu")
+# initialize dataloaders
+numworkers = 0
+batchsize = 8
+train_loader = DataLoader(
+    dataset_train, batch_size=batchsize, shuffle=True, num_workers=numworkers)
+test_loader = DataLoader(dataset_test, batch_size=batchsize,
+                         shuffle=False, num_workers=numworkers)
+class PerceptualLoss(nn.Module):
+    def __init__(self, feature_layer=9):
+        super(PerceptualLoss, self).__init__()
+        vgg = models.vgg16(
+            weights=models.VGG16_Weights.DEFAULT).features[:feature_layer].eval()
+        for param in vgg.parameters():
+            param.requires_grad = False
+        self.vgg = vgg.to(device)
+        self.transform = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                              std=[0.229, 0.224, 0.225])
+    def forward(self, pred, target):
+        pred = self.transform(pred)
+        target = self.transform(target)
+        return nn.functional.mse_loss(self.vgg(pred), self.vgg(target))
+def total_variation_loss(x):
+    return torch.mean(torch.abs(x[:, :, :, :-1] - x[:, :, :, 1:])) + \
+        torch.mean(torch.abs(x[:, :, :-1, :] - x[:, :, 1:, :]))
+# Initialize UNet model
+unet_model = UNet(in_channels=3, out_channels=3).to(device)
+# criterion = nn.MSELoss()
+mse_loss = nn.MSELoss()
+perceptual_loss = PerceptualLoss().to(device)
+perceptual_loss_scaling_factor = 0.1  # Adjust this factor based on your needs
+optimizer = optim.Adam(unet_model.parameters(), lr=0.001)
+train_previous = False
+if train_previous:
+    unet_model.load_state_dict(torch.load(
+        './models/terrain/turbo_terrain_unet_model.pth'))
+    print("Loaded previous model state from './models/terrain/turbo_terrain_unet_model.pth'")
+num_epochs = 5
+for epoch in range(num_epochs):
+    # save model to checkpoints
+    # torch.save(unet_model.state_dict(
+    # ), f'./models/checkpoints/terrain/turbo_terrain_unet_model_epoch_{epoch + 1}.pth')
+    # unet_model.train()
+    running_loss = 0.0
+    for i, (height, terrain, segmentation) in enumerate(train_loader):
+        terrain = (terrain * 2) - 1   # if originally ∈ [0,1]
+        # CHECK ABOVE LINE
+        images = segmentation
+        images = images.to(device)
+        target_images = terrain
+        target_images = target_images.to(device)
+        # Forward pass
+        outputs = unet_model(images)
+        # loss = criterion(outputs, target_images)
+        loss = mse_loss(outputs, target_images) + perceptual_loss_scaling_factor * \
+            perceptual_loss(outputs, target_images)
+        running_loss += loss.item()
+        # Backward pass and optimization
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        if (i + 1) % 10 == 0:
+            # Use end='' to avoid new line
+            # print(f'\rEpoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{
+            #       len(train_loader)}], Loss: {loss.item():.4f}', end='', flush=True)
+            print(f"epoch: {epoch+1}")
+            print(f"step: {i+1}/{len(train_loader)}")
+            print(f"loss: {loss.item():.4f}")
+torch.save(unet_model.state_dict(),
+           './models/terrain/turbo_terrain_unet_model.pth')
+print("Model saved to './models/terrain/turbo_terrain_unet_model.pth'")

util/__pycache__/dataset.cpython-311.pyc ADDED Viewed

Binary file (3.4 kB). View file

util/__pycache__/unet.cpython-311.pyc ADDED Viewed

Binary file (4.69 kB). View file

util/dataset.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from torch.utils.data import Dataset
+import os
+from PIL import Image
+class TerrainDataset(Dataset):
+    def __init__(self, data_dir, transform=None):
+        self.data_dir = data_dir
+        self.transform = transform
+        # Sort to ensure alignment between h, t, i files
+        self.height_paths = sorted(
+            [os.path.join(data_dir, f)
+             for f in os.listdir(data_dir) if '_h' in f]
+        )
+        self.terrain_paths = sorted(
+            [os.path.join(data_dir, f)
+             for f in os.listdir(data_dir) if '_t' in f]
+        )
+        self.segmentation_paths = sorted(
+            [os.path.join(data_dir, f) for f in os.listdir(
+                data_dir) if '_i' in f or '_i2' in f]
+        )
+        assert len(self.height_paths) == len(self.terrain_paths) == len(self.segmentation_paths), \
+            "Mismatch in dataset triplet lengths"
+        print(f"Found {len(self.height_paths)} triplets in {data_dir}")
+    def __len__(self):
+        return len(self.height_paths)
+    def __getitem__(self, idx):
+        # Load heightmap, terrain, segmentation
+        paths = [self.height_paths[idx], self.terrain_paths[idx],
+                 self.segmentation_paths[idx]]
+        images = []
+        for path in paths:
+            # image = Image.open(path).convert('RGB')
+            image = Image.open(path)
+            if self.transform:
+                image = self.transform(image)
+            images.append(image)
+        return tuple(images)  # (heightmap, terrain, segmentation)

util/unet.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class DoubleConv(nn.Module):
+    def __init__(self, out_channels):
+        super(DoubleConv, self).__init__()
+        self.conv = nn.Sequential(
+            nn.LazyConv2d(out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True),
+            nn.LazyConv2d(out_channels, kernel_size=3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.conv(x)
+class UNet(nn.Module):
+    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512], use_sigmoid=True):
+        self.use_sigmoid = use_sigmoid
+        super(UNet, self).__init__()
+        self.encoder = nn.ModuleList()
+        self.decoder = nn.ModuleList()
+        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
+        # Encoder
+        for feature in features:
+            self.encoder.append(DoubleConv(feature))
+        # Bottleneck
+        self.bottleneck = DoubleConv(features[-1] * 2)
+        # Decoder
+        for feature in reversed(features):
+            self.decoder.append(nn.ConvTranspose2d(
+                feature * 2, feature, kernel_size=2, stride=2))
+            self.decoder.append(DoubleConv(feature))  # after concatenation
+        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)
+        self.output_activation = nn.Sigmoid() if out_channels == 1 else nn.Identity()
+    def forward(self, x):
+        skip_connections = []
+        for layer in self.encoder:
+            x = layer(x)
+            skip_connections.append(x)
+            x = self.pool(x)
+        x = self.bottleneck(x)
+        skip_connections = skip_connections[::-1]
+        for idx in range(0, len(self.decoder), 2):
+            x = self.decoder[idx](x)  # upsample
+            skip_connection = skip_connections[idx // 2]
+            if x.shape != skip_connection.shape:
+                x = F.interpolate(
+                    x, size=skip_connection.shape[2:], mode='bilinear', align_corners=True)
+            x = torch.cat((skip_connection, x), dim=1)  # concat
+            x = self.decoder[idx + 1](x)  # double conv
+        # return self.final_conv(x)
+        if (self.use_sigmoid):
+            return self.output_activation(self.final_conv(x))
+        else:
+            return self.final_conv(x)