zzuxzt commited on Feb 2

Commit

5523920

verified ·

1 Parent(s): 126f887

Upload folder using huggingface_hub

Browse files

Files changed (32) hide show

.gitattributes +3 -0
LICENSE +21 -0
README.md +342 -3
demo/1.eng4th_semantic_navigation.gif +3 -0
demo/1.lobby_semantic_navigation.gif +3 -0
demo/3.cyc4th_semantic_navigation.gif +3 -0
ros_deployment_ws/src/semantic_cnn_nav/cnn_msgs/CMakeLists.txt +28 -0
ros_deployment_ws/src/semantic_cnn_nav/cnn_msgs/msg/CNN_data.msg +5 -0
ros_deployment_ws/src/semantic_cnn_nav/cnn_msgs/package.xml +17 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/CMakeLists.txt +199 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/launch/cnn_data_pub.launch +11 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/launch/semantic_cnn_inference.launch +11 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/launch/semantic_cnn_nav_gazebo.launch +50 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/package.xml +64 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/__pycache__/cnn_model.cpython-38.pyc +0 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/__pycache__/custom_cnn_full.cpython-38.pyc +0 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/__pycache__/model.cpython-38.pyc +0 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/__pycache__/pfeiffer_model.cpython-38.pyc +0 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/cnn_data_pub.py +108 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/cnn_model.py +783 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/goal_visualize.py +40 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/model/s3_net_model.pth +3 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/model/semantic_cnn_model.pth +3 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/pure_pursuit.py +254 -0
ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/semantic_cnn_nav_inference.py +256 -0
training/model/semantic_cnn_model.pth +3 -0
training/run_eval.sh +63 -0
training/run_train.sh +73 -0
training/scripts/__pycache__/model.cpython-38.pyc +0 -0
training/scripts/decode_demo.py +201 -0
training/scripts/model.py +475 -0
training/scripts/train.py +385 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+demo/1.eng4th_semantic_navigation.gif filter=lfs diff=lfs merge=lfs -text
+demo/1.lobby_semantic_navigation.gif filter=lfs diff=lfs merge=lfs -text
+demo/3.cyc4th_semantic_navigation.gif filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 Temple Robotics and Artificial Intelligence Lab
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,3 +1,342 @@
----
-license: mit
----

+# Semantic2D: Enabling Semantic Scene Understanding with 2D Lidar Alone
+Semantic CNN Navigation implementation code for our paper ["Semantic2D: Enabling Semantic Scene Understanding with 2D Lidar Alone"](https://arxiv.org/pdf/2409.09899).
+Video demos can be found at [multimedia demonstrations](https://youtu.be/P1Hsvj6WUSY).
+The Semantic2D dataset can be found and downloaded at: https://doi.org/10.5281/zenodo.18350696.
+## Related Resources
+- **Dataset Download:** https://doi.org/10.5281/zenodo.18350696
+- **SALSA (Dataset and Labeling Framework):** https://github.com/TempleRAIL/semantic2d
+- **S³-Net (Stochastic Semantic Segmentation):** https://github.com/TempleRAIL/s3_net
+- **Semantic CNN Navigation:** https://github.com/TempleRAIL/semantic_cnn_nav
+## Overview
+[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+This repository contains two main components:
+1. **Training**: CNN-based control policy training using the Semantic2D dataset
+2. **ROS Deployment**: Real-time semantic-aware navigation for mobile robots
+The Semantic CNN Navigation system combines:
+- **S³-Net**: Real-time semantic segmentation of 2D LiDAR scans
+- **SemanticCNN**: ResNet-based control policy that uses semantic information for navigation
+## Demo Results
+**Engineering Lobby Semantic Navigation**
+![Engineering Lobby Semantic Navigation](./demo/1.lobby_semantic_navigation.gif)
+**Engineering 4th Floor Semantic Navigation**
+![Engineering 4th Floor Semantic Navigation](./demo/1.eng4th_semantic_navigation.gif)
+**CYC 4th Floor Semantic Navigation**
+![CYC 4th Floor Semantic Navigation](./demo/3.cyc4th_semantic_navigation.gif)
+## System Architecture
+```
+┌─────────────────────────────────────────────────────────────────────┐
+│                     Semantic CNN Navigation                          │
+├─────────────────────────────────────────────────────────────────────┤
+│                                                                       │
+│  ┌─────────────┐    ┌─────────────┐    ┌─────────────────────────┐  │
+│  │  LiDAR Scan │───▶│   S³-Net    │───▶│  Semantic Labels (10)   │  │
+│  │  + Intensity│    │  Segmentation│    │  per LiDAR point        │  │
+│  └─────────────┘    └─────────────┘    └───────────┬─────────────┘  │
+│                                                     │                 │
+│  ┌─────────────┐                                    ▼                 │
+│  │  Sub-Goal   │───────────────────────▶┌─────────────────────────┐  │
+│  │  (x, y)     │                        │     SemanticCNN         │  │
+│  └─────────────┘                        │  (ResNet + Bottleneck)  │  │
+│                                         │                         │  │
+│  ┌─────────────┐                        │  Input: 80x80 scan map  │  │
+│  │  Scan Map   │───────────────────────▶│       + semantic map    │  │
+│  │  (history)  │                        │       + sub-goal        │  │
+│  └─────────────┘                        └───────────┬─────────────┘  │
+│                                                     │                 │
+│                                                     ▼                 │
+│                                         ┌─────────────────────────┐  │
+│                                         │  Velocity Command       │  │
+│                                         │  (linear_x, angular_z)  │  │
+│                                         └─────────────────────────┘  │
+└─────────────────────────────────────────────────────────────────────┘
+```
+## Requirements
+### Training
+- Python 3.7+
+- PyTorch 1.7.1+
+- TensorBoard
+- NumPy
+- tqdm
+### ROS Deployment
+- Ubuntu 20.04
+- ROS Noetic
+- Python 3.8.5
+- PyTorch 1.7.1
+Install training dependencies:
+```bash
+pip install torch torchvision tensorboardX numpy tqdm
+```
+---
+# Part 1: Training
+## Dataset Structure
+The training expects the Semantic2D dataset organized as follows:
+```
+~/semantic2d_data/
+├── dataset.txt                # List of dataset folders
+├── 2024-04-11-15-24-29/       # Dataset folder 1
+│   ├── train.txt              # Training sample list
+│   ├── dev.txt                # Validation sample list
+│   ├── scans_lidar/           # Range scans (.npy)
+│   ├── semantic_label/        # Semantic labels (.npy)
+│   ├── sub_goals_local/       # Local sub-goals (.npy)
+│   └── velocities/            # Ground truth velocities (.npy)
+└── ...
+```
+## Model Architecture
+**SemanticCNN** uses a ResNet-style architecture with Bottleneck blocks:
+| Component | Details |
+|-----------|---------|
+| **Input** | 2 channels: scan map (80x80) + semantic map (80x80) |
+| **Backbone** | ResNet with Bottleneck blocks [2, 1, 1] |
+| **Goal Input** | 2D sub-goal (x, y) concatenated after pooling |
+| **Output** | 2D velocity (linear_x, angular_z) |
+| **Loss** | MSE Loss |
+**Key Parameters:**
+- Sequence length: 10 frames
+- Image size: 80x80
+- LiDAR points: 1081 → downsampled to 720 (removing ±180 points)
+## Training
+Train the Semantic CNN model:
+```bash
+cd training
+sh run_train.sh ~/semantic2d_data/ ~/semantic2d_data/
+```
+**Arguments:**
+- `$1` - Training data directory
+- `$2` - Validation data directory
+**Training Configuration** (in `scripts/train.py`):
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `NUM_EPOCHS` | 4000 | Total training epochs |
+| `BATCH_SIZE` | 64 | Samples per batch |
+| `LEARNING_RATE` | 0.001 | Initial learning rate |
+**Learning Rate Schedule:**
+- Epochs 0-40: `1e-3`
+- Epochs 40-2000: `2e-4`
+- Epochs 2000-21000: `2e-5`
+- Epochs 21000+: `1e-5`
+Model checkpoints saved every 50 epochs to `./model/`.
+## Evaluation
+Evaluate the trained model:
+```bash
+cd training
+sh run_eval.sh ~/semantic2d_data/
+```
+**Output:** Results saved to `./output/`
+## Training File Structure
+```
+training/
+├── model/
+│   └── semantic_cnn_model.pth    # Pretrained model weights
+├── scripts/
+│   ├── model.py                  # SemanticCNN architecture + NavDataset
+│   ├── train.py                  # Training script
+│   └── decode_demo.py            # Evaluation/demo script
+├── run_train.sh                  # Training driver script
+└── run_eval.sh                   # Evaluation driver script
+```
+---
+## TensorBoard Monitoring
+Training logs are saved to `./runs/`. View training progress:
+```bash
+cd training
+tensorboard --logdir=runs
+```
+Monitored metrics:
+- Training loss
+- Validation loss
+---
+# Part 2: ROS Deployment
+## Prerequisites
+Install the following ROS packages:
+```bash
+# Create catkin workspace
+mkdir -p ~/catkin_ws/src
+cd ~/catkin_ws/src
+# Clone required packages
+git clone https://github.com/TempleRAIL/robot_gazebo.git
+git clone https://github.com/TempleRAIL/pedsim_ros_with_gazebo.git
+# Build
+cd ~/catkin_ws
+catkin_make
+source devel/setup.bash
+```
+## Installation
+1. Copy the ROS workspace to your catkin workspace:
+```bash
+cp -r ros_deployment_ws/src/semantic_cnn_nav ~/catkin_ws/src/
+```
+2. Build the workspace:
+```bash
+cd ~/catkin_ws
+catkin_make
+source devel/setup.bash
+```
+## Usage
+### Launch Gazebo Simulation
+```bash
+roslaunch semantic_cnn_nav semantic_cnn_nav_gazebo.launch
+```
+This launch file starts:
+- Gazebo simulator with pedestrians (pedsim)
+- AMCL localization
+- CNN data publisher
+- Semantic CNN inference node
+- RViz visualization
+### Launch Configuration
+Key parameters in `semantic_cnn_nav_gazebo.launch`:
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| `s3_net_model_file` | `model/s3_net_model.pth` | S³-Net model path |
+| `semantic_cnn_model_file` | `model/semantic_cnn_model.pth` | SemanticCNN model path |
+| `scene_file` | `eng_hall_5.xml` | Pedsim scenario file |
+| `world_name` | `eng_hall.world` | Gazebo world file |
+| `map_file` | `gazebo_eng_lobby.yaml` | Navigation map |
+| `initial_pose_x/y/a` | 1.0, 0.0, 0.13 | Robot initial pose |
+### Send Navigation Goals
+Use RViz "2D Nav Goal" tool to send navigation goals to the robot.
+## ROS Nodes
+### cnn_data_pub
+Publishes processed LiDAR data for the CNN.
+**Subscriptions:**
+- `/scan` (sensor_msgs/LaserScan)
+**Publications:**
+- `/cnn_data` (cnn_msgs/CNN_data)
+### semantic_cnn_nav_inference
+Main inference node combining S³-Net and SemanticCNN.
+**Subscriptions:**
+- `/cnn_data` (cnn_msgs/CNN_data)
+**Publications:**
+- `/navigation_velocity_smoother/raw_cmd_vel` (geometry_msgs/Twist)
+**Parameters:**
+- `~s3_net_model_file`: Path to S³-Net model
+- `~semantic_cnn_model_file`: Path to SemanticCNN model
+## ROS Deployment File Structure
+```
+ros_deployment_ws/
+└── src/
+    └── semantic_cnn_nav/
+        ├── cnn_msgs/
+        │   └── msg/
+        │       └── CNN_data.msg          # Custom message definition
+        └── semantic_cnn/
+            ├── launch/
+            │   ├── cnn_data_pub.launch
+            │   ├── semantic_cnn_inference.launch
+            │   └── semantic_cnn_nav_gazebo.launch
+            └── src/
+                ├── model/
+                │   ├── s3_net_model.pth      # S³-Net pretrained weights
+                │   └── semantic_cnn_model.pth # SemanticCNN weights
+                ├── cnn_data_pub.py           # Data preprocessing node
+                ├── cnn_model.py              # Model definitions
+                ├── pure_pursuit.py           # Pure pursuit controller
+                ├── goal_visualize.py         # Goal visualization
+                └── semantic_cnn_nav_inference.py  # Main inference node
+```
+---
+## Pre-trained Models
+Pre-trained models are included:
+| Model | Location | Description |
+|-------|----------|-------------|
+| `s3_net_model.pth` | `ros_deployment_ws/.../model/` | S³-Net semantic segmentation |
+| `semantic_cnn_model.pth` | `training/model/` | SemanticCNN navigation policy |
+---
+## Citation
+```bibtex
+@article{xie2026semantic2d,
+  title={Semantic2D: Enabling Semantic Scene Understanding with 2D Lidar Alone},
+  author={Xie, Zhanteng and Pan, Yipeng and Zhang, Yinqiang and Pan, Jia and Dames, Philip},
+  journal={arXiv preprint arXiv:2409.09899},
+  year={2026}
+}
+@inproceedings{xie2021towards,
+  title={Towards Safe Navigation Through Crowded Dynamic Environments},
+  author={Xie, Zhanteng and Xin, Pujie and Dames, Philip},
+  booktitle={2021 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)},
+  year={2021},
+  doi={10.1109/IROS51168.2021.9636102}
+}
+```

demo/1.eng4th_semantic_navigation.gif ADDED Viewed

Git LFS Details

SHA256: 0553edb0c4c742e9940bff7afcd2fc7a25f3ffcc0f6f5d0d2e6a65d952d56768
Pointer size: 132 Bytes
Size of remote file: 7.63 MB

demo/1.lobby_semantic_navigation.gif ADDED Viewed

Git LFS Details

SHA256: e26fb5c2fb52941f4e33c7b4b8e7f116d3e3689d2f9c40d7fe288972b5af48f5
Pointer size: 133 Bytes
Size of remote file: 13.2 MB

demo/3.cyc4th_semantic_navigation.gif ADDED Viewed

Git LFS Details

SHA256: 98851f320173a624968354fc5d24de07c46ecaff9b593872ff3276767f352004
Pointer size: 133 Bytes
Size of remote file: 22.7 MB

ros_deployment_ws/src/semantic_cnn_nav/cnn_msgs/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+cmake_minimum_required(VERSION 2.8.12)
+project(cnn_msgs)
+set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
+find_package(catkin REQUIRED COMPONENTS
+   roscpp
+   rospy
+   std_msgs
+   message_generation
+)
+add_message_files(
+  FILES
+    CNN_data.msg
+)
+generate_messages(
+  DEPENDENCIES
+    std_msgs
+)
+catkin_package(
+  CATKIN_DEPENDS
+    std_msgs
+)

ros_deployment_ws/src/semantic_cnn_nav/cnn_msgs/msg/CNN_data.msg ADDED Viewed

	@@ -0,0 +1,5 @@

+float32[] scan_his	  	# 1081 range data from the laser scan
+float32[] intensity_his	  	# 1081 intensity data from the laser scan
+float32[] scan		  	# 1081 range data from the laser scan
+float32[] goal     		# current goal in robot frame
+float32[] vel                   # current velocity in robot frame

ros_deployment_ws/src/semantic_cnn_nav/cnn_msgs/package.xml ADDED Viewed

	@@ -0,0 +1,17 @@

+<?xml version="1.0"?>
+<package>
+  <name>cnn_msgs</name>
+  <version>1.1.3</version>
+  <description>CNN messages.</description>
+  <maintainer email="zzuxzt@to.do">zzuxzt</maintainer>
+  <license>BSD</license>
+  <url type="website">https://github.com/zzuxzt</url>
+  <author email="zzuxzt@to.do">zzuxzt</author>
+  <buildtool_depend>catkin</buildtool_depend>
+  <build_depend>message_generation</build_depend>
+  <build_depend>std_msgs</build_depend>
+  <run_depend>std_msgs</run_depend>
+</package>

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/CMakeLists.txt ADDED Viewed

	@@ -0,0 +1,199 @@

+cmake_minimum_required(VERSION 2.8.3)
+project(semantic_cnn_nav)
+## Compile as C++11, supported in ROS Kinetic and newer
+# add_compile_options(-std=c++11)
+## Find catkin macros and libraries
+## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
+## is used, also find other catkin packages
+find_package(catkin REQUIRED COMPONENTS
+  roscpp
+  rospy
+)
+#add_executable(start_learning scripts/start_learning.py)
+## System dependencies are found with CMake's conventions
+# find_package(Boost REQUIRED COMPONENTS system)
+## Uncomment this if the package has a setup.py. This macro ensures
+## modules and global scripts declared therein get installed
+## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
+# catkin_python_setup()
+################################################
+## Declare ROS messages, services and actions ##
+################################################
+## To declare and build messages, services or actions from within this
+## package, follow these steps:
+## * Let MSG_DEP_SET be the set of packages whose message types you use in
+##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
+## * In the file package.xml:
+##   * add a build_depend tag for "message_generation"
+##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
+##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
+##     but can be declared for certainty nonetheless:
+##     * add a exec_depend tag for "message_runtime"
+## * In this file (CMakeLists.txt):
+##   * add "message_generation" and every package in MSG_DEP_SET to
+##     find_package(catkin REQUIRED COMPONENTS ...)
+##   * add "message_runtime" and every package in MSG_DEP_SET to
+##     catkin_package(CATKIN_DEPENDS ...)
+##   * uncomment the add_*_files sections below as needed
+##     and list every .msg/.srv/.action file to be processed
+##   * uncomment the generate_messages entry below
+##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
+## Generate messages in the 'msg' folder
+# add_message_files(
+#   FILES
+#   Message1.msg
+#   Message2.msg
+# )
+## Generate services in the 'srv' folder
+# add_service_files(
+#   FILES
+#   Service1.srv
+#   Service2.srv
+# )
+## Generate actions in the 'action' folder
+# add_action_files(
+#   FILES
+#   Action1.action
+#   Action2.action
+# )
+## Generate added messages and services with any dependencies listed here
+# generate_messages(
+#   DEPENDENCIES
+#   std_msgs  # Or other packages containing msgs
+# )
+################################################
+## Declare ROS dynamic reconfigure parameters ##
+################################################
+## To declare and build dynamic reconfigure parameters within this
+## package, follow these steps:
+## * In the file package.xml:
+##   * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
+## * In this file (CMakeLists.txt):
+##   * add "dynamic_reconfigure" to
+##     find_package(catkin REQUIRED COMPONENTS ...)
+##   * uncomment the "generate_dynamic_reconfigure_options" section below
+##     and list every .cfg file to be processed
+## Generate dynamic reconfigure parameters in the 'cfg' folder
+# generate_dynamic_reconfigure_options(
+#   cfg/DynReconf1.cfg
+#   cfg/DynReconf2.cfg
+# )
+###################################
+## catkin specific configuration ##
+###################################
+## The catkin_package macro generates cmake config files for your package
+## Declare things to be passed to dependent projects
+## INCLUDE_DIRS: uncomment this if your package contains header files
+## LIBRARIES: libraries you create in this project that dependent projects also need
+## CATKIN_DEPENDS: catkin_packages dependent projects also need
+## DEPENDS: system dependencies of this project that dependent projects also need
+catkin_package(
+#  INCLUDE_DIRS include
+#  LIBRARIES cablearning
+#  CATKIN_DEPENDS other_catkin_pkg
+#  DEPENDS system_lib
+)
+###########
+## Build ##
+###########
+## Specify additional locations of header files
+## Your package locations should be listed before other locations
+include_directories(
+# include
+ ${catkin_INCLUDE_DIRS}
+)
+## Declare a C++ library
+# add_library(${PROJECT_NAME}
+#   src/${PROJECT_NAME}/cablearning.cpp
+# )
+## Add cmake target dependencies of the library
+## as an example, code may need to be generated before libraries
+## either from message generation or dynamic reconfigure
+# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+## Declare a C++ executable
+## With catkin_make all packages are built within a single CMake context
+## The recommended prefix ensures that target names across packages don't collide
+# add_executable(${PROJECT_NAME}_node src/cablearning_node.cpp)
+## Rename C++ executable without prefix
+## The above recommended prefix causes long target names, the following renames the
+## target back to the shorter version for ease of user use
+## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
+# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
+## Add cmake target dependencies of the executable
+## same as for the library above
+# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
+## Specify libraries to link a library or executable target against
+# target_link_libraries(${PROJECT_NAME}_node
+#   ${catkin_LIBRARIES}
+# )
+#############
+## Install ##
+#############
+# all install targets should use catkin DESTINATION variables
+# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
+## Mark executable scripts (Python etc.) for installation
+## in contrast to setup.py, you can choose the destination
+# install(PROGRAMS
+#   scripts/my_python_script
+#   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+# )
+## Mark executables and/or libraries for installation
+# install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
+#   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+#   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
+#   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
+# )
+## Mark cpp header files for installation
+# install(DIRECTORY include/${PROJECT_NAME}/
+#   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
+#   FILES_MATCHING PATTERN "*.h"
+#   PATTERN ".svn" EXCLUDE
+# )
+## Mark other files for installation (e.g. launch and bag files, etc.)
+# install(FILES
+#   # myfile1
+#   # myfile2
+#   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
+# )
+#############
+## Testing ##
+#############
+## Add gtest based cpp test target and link libraries
+# catkin_add_gtest(${PROJECT_NAME}-test test/test_cablearning.cpp)
+# if(TARGET ${PROJECT_NAME}-test)
+#   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
+# endif()
+## Add folders to be run by python nosetests
+# catkin_add_nosetests(test)

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/launch/cnn_data_pub.launch ADDED Viewed

	@@ -0,0 +1,11 @@

+<launch>
+  <!-- Subgoal Publisher -->
+  <node name="pure_pursuit" pkg="semantic_cnn_nav" type="pure_pursuit.py" output="screen" required="true" />
+  <!-- CNN Data  Publisher -->
+  <node name="cnn_data_pub" pkg="semantic_cnn_nav" type="cnn_data_pub.py"/>
+  <!-- Goal visualization -->
+  <node name="goal_visualize" pkg="semantic_cnn_nav" type="goal_visualize.py" output="screen" />
+</launch>

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/launch/semantic_cnn_inference.launch ADDED Viewed

	@@ -0,0 +1,11 @@

+<launch>
+  <arg name="s3_net_model_file"   default="$(find semantic_cnn_nav)/src/model/s3_net_model.pth"/>
+  <arg name="semantic_cnn_model_file"   default="$(find semantic_cnn_nav)/src/model/semantic_cnn_model.pth"/>
+  <!-- DRL-VO publisher -->
+  <node name="drl_vo_cmd" pkg="semantic_cnn_nav" type="semantic_cnn_nav_inference.py" output="screen">
+    <param name="s3_net_model_file" value="$(arg s3_net_model_file)" type="string"/>
+    <param name="semantic_cnn_model_file" value="$(arg semantic_cnn_model_file)" type="string"/>
+  </node>
+</launch>

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/launch/semantic_cnn_nav_gazebo.launch ADDED Viewed

	@@ -0,0 +1,50 @@

+<launch>
+  <arg name="s3_net_model_file"   default="$(find semantic_cnn_nav)/src/model/s3_net_model.pth"/>
+  <arg name="semantic_cnn_model_file"   default="$(find semantic_cnn_nav)/src/model/semantic_cnn_model.pth"/>
+  <arg name="scene_file" default="$(find pedsim_simulator)/scenarios/lobby/eng_hall_5.xml"/>
+  <arg name="world_name" default="$(find pedsim_gazebo_plugin)/worlds/eng_hall.world"/>
+  <arg name="gui" default="true" doc="Bring up the Gazebo graphical interface"/>
+  <arg name="pose_initial_x" default="1.0"/>
+  <arg name="pose_initial_y" default="1.0"/>
+  <arg name="map_file" default="$(find robot_gazebo)/maps/gazebo_eng_lobby/gazebo_eng_lobby.yaml"/>
+  <arg name="initial_pose_x" default="1.0"/>
+  <arg name="initial_pose_y" default="0.0"/>
+  <arg name="initial_pose_a" default="0.13"/>
+  <!-- Output -->
+  <arg name="output" default="log"/>
+  <arg name="enable_opencv" default="true"/>
+  <arg name="enable_console_output" default="true"/>
+  <arg name="rviz" default="true"/>
+  <!-- Pedsim Gazebo -->
+  <include file="$(find pedsim_simulator)/launch/robot.launch">
+    <arg name="scene_file" value="$(arg scene_file)"/>
+    <arg name="world_name" value="$(arg world_name)"/>
+    <arg name="gui" value="$(arg gui)"/>
+    <arg name="pose_initial_x" value="$(arg pose_initial_x)"/>
+    <arg name="pose_initial_y" value="$(arg pose_initial_y)"/>
+  </include>
+  <!-- AMCL -->
+  <include file="$(find robot_gazebo)/launch/amcl_demo_drl.launch">
+    <arg name="map_file" value="$(arg map_file)"/>
+    <arg name="initial_pose_x" value="$(arg initial_pose_x)"/>
+    <arg name="initial_pose_y" value="$(arg initial_pose_y)"/>
+    <arg name="initial_pose_a" value="$(arg initial_pose_a)"/>
+  </include>
+  <!-- CNN DATA -->
+  <include file="$(find semantic_cnn_nav)/launch/cnn_data_pub.launch"/>
+  <!-- DRL-VO Control Policy -->
+  <include file="$(find semantic_cnn_nav)/launch/semantic_cnn_inference.launch">
+    <arg name="s3_net_model_file" value="$(arg s3_net_model_file)"/>
+    <arg name="semantic_cnn_model_file" value="$(arg semantic_cnn_model_file)"/>
+  </include>
+  <!-- Rviz-->
+  <include file="$(find robot_gazebo)/launch/view_navigation.launch" if="$(arg rviz)"/>
+</launch>

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/package.xml ADDED Viewed

	@@ -0,0 +1,64 @@

+<?xml version="1.0"?>
+<package format="2">
+  <name>semantic_cnn_nav</name>
+  <version>0.0.0</version>
+  <description>The semantic_cnn_nav package</description>
+  <!-- One maintainer tag required, multiple allowed, one person per tag -->
+  <!-- Example:  -->
+  <!-- <maintainer email="jane.doe@example.com">Jane Doe</maintainer> -->
+  <maintainer email="xzt@todo.todo">xzt</maintainer>
+  <!-- One license tag required, multiple allowed, one license per tag -->
+  <!-- Commonly used license strings: -->
+  <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
+  <license>MIT</license>
+  <!-- Url tags are optional, but multiple are allowed, one per tag -->
+  <!-- Optional attribute type can be: website, bugtracker, or repository -->
+  <!-- Example: -->
+  <!-- <url type="website">http://wiki.ros.org</url> -->
+  <!-- Author tags are optional, multiple are allowed, one per tag -->
+  <!-- Authors do not have to be maintainers, but could be -->
+  <!-- Example: -->
+  <!-- <author email="jane.doe@example.com">Jane Doe</author> -->
+  <!-- The *depend tags are used to specify dependencies -->
+  <!-- Dependencies can be catkin packages or system dependencies -->
+  <!-- Examples: -->
+  <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
+  <!--   <depend>roscpp</depend> -->
+  <!--   Note that this is equivalent to the following: -->
+  <!--   <build_depend>roscpp</build_depend> -->
+  <!--   <exec_depend>roscpp</exec_depend> -->
+  <!-- Use build_depend for packages you need at compile time: -->
+  <!--   <build_depend>message_generation</build_depend> -->
+  <!-- Use build_export_depend for packages you need in order to build against this package: -->
+  <!--   <build_export_depend>message_generation</build_export_depend> -->
+  <!-- Use buildtool_depend for build tool packages: -->
+  <!--   <buildtool_depend>catkin</buildtool_depend> -->
+  <!-- Use exec_depend for packages you need at runtime: -->
+  <!--   <exec_depend>message_runtime</exec_depend> -->
+  <!-- Use test_depend for packages you need only for testing: -->
+  <!--   <test_depend>gtest</test_depend> -->
+  <!-- Use doc_depend for packages you need only for building documentation: -->
+  <!--   <doc_depend>doxygen</doc_depend> -->
+  <buildtool_depend>catkin</buildtool_depend>
+  <build_depend>roscpp</build_depend>
+  <build_depend>rospy</build_depend>
+  <build_export_depend>roscpp</build_export_depend>
+  <build_export_depend>rospy</build_export_depend>
+  <exec_depend>roscpp</exec_depend>
+  <exec_depend>rospy</exec_depend>
+  <!-- The export tag contains other, unspecified, tags -->
+  <export>
+    <!-- Other tools can request additional information be placed here -->
+  </export>
+</package>

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/__pycache__/cnn_model.cpython-38.pyc ADDED Viewed

Binary file (18.6 kB). View file

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/__pycache__/custom_cnn_full.cpython-38.pyc ADDED Viewed

Binary file (6.02 kB). View file

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/__pycache__/model.cpython-38.pyc ADDED Viewed

Binary file (15.9 kB). View file

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/__pycache__/pfeiffer_model.cpython-38.pyc ADDED Viewed

Binary file (12.8 kB). View file

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/cnn_data_pub.py ADDED Viewed

	@@ -0,0 +1,108 @@

+#!/usr/bin/env python
+import numpy as np
+import rospy
+from geometry_msgs.msg import Point, PoseStamped, Twist, TwistStamped
+from sensor_msgs.msg import LaserScan
+# custom define messages:
+from cnn_msgs.msg import CNN_data
+# parameters:
+NUM_TP = 10     # the number of timestamps
+class CnnData:
+    # Constructor
+    def __init__(self):
+        # initialize data:
+        self.scan_his = []
+        self.intensity_his = []
+        self.semantic_his = []
+        self.scan = np.zeros(1081)
+        self.intensity = np.zeros(1081)
+        self.goal = np.zeros(2)
+        self.vel = np.zeros(2)
+        # timer:
+        self.timer = None
+        self.rate = 10  # 20 Hz velocity controller
+        self.ts_cnt = 0  # maximum 10 timesteps
+        # initialize ROS objects
+        self.scan_sub = rospy.Subscriber("scan", LaserScan, self.scan_callback)
+        self.goal_sub = rospy.Subscriber("cnn_goal", Point, self.goal_callback)
+        self.vel_sub = rospy.Subscriber("mobile_base/commands/velocity", Twist, self.vel_callback)
+        self.cnn_data_pub = rospy.Publisher('cnn_data', CNN_data, queue_size=1, latch=False)
+    # Callback function for the scan measurement subscriber
+    def scan_callback(self, laserScan_msg):
+        # get the laser scan data:
+        scan_data = np.array(laserScan_msg.ranges, dtype=np.float32)
+        scan_data[np.isnan(scan_data)] = 30.
+        scan_data[np.isinf(scan_data)] = 30.
+        # gazebo:
+        self.scan = np.concatenate((scan_data, [0]), axis=0)
+        # real world:
+        self.scan = scan_data
+        intensity_data = np.array(laserScan_msg.intensities, dtype=np.float32)
+        intensity_data[np.isnan(intensity_data)] = 0.
+        intensity_data[np.isinf(intensity_data)] = 0.
+        # gazebo:
+        self.intensity = np.concatenate((intensity_data, [0]), axis=0)
+        # real world:
+        self.intensity = intensity_data
+        # start the timer if this is the first path received
+        if self.timer is None:
+            self.start()
+    # Callback function for the current goal subscriber
+    def goal_callback(self, goal_msg):
+        # Cartesian coordinate:
+        self.goal = np.zeros(2)
+        self.goal[0] = goal_msg.x
+        self.goal[1] = goal_msg.y
+    # Callback function for the velocity subscriber
+    def vel_callback(self, vel_msg):
+        self.vel = np.zeros(2)
+        self.vel[0] = vel_msg.linear.x
+        self.vel[1] = vel_msg.angular.z
+    # Start the timer that calculates command velocities
+    def start(self):
+        # initialize timer for controller update
+        self.timer = rospy.Timer(rospy.Duration(1./self.rate), self.timer_callback)
+     # function that runs every time the timer finishes to ensure that velocity commands are sent regularly
+    def timer_callback(self, event):
+        # generate the trajectory of pedstrians:
+        self.scan_his.append(self.scan.tolist())
+        self.intensity_his.append(self.intensity.tolist())
+        self.ts_cnt = self.ts_cnt + 1
+        if(self.ts_cnt == NUM_TP):
+            # publish cnn data:
+            cnn_data = CNN_data()
+            cnn_data.scan_his = [float(val) for sublist in self.scan_his for val in sublist]
+            cnn_data.intensity_his = [float(val) for sublist in self.intensity_his for val in sublist]
+            cnn_data.scan = self.scan
+            cnn_data.goal = self.goal
+            cnn_data.vel = self.vel
+            self.cnn_data_pub.publish(cnn_data)
+            # reset the position data list:
+            self.ts_cnt = NUM_TP-1
+            self.scan = self.scan[1:NUM_TP]
+if __name__ == '__main__':
+    try:
+        rospy.init_node('cnn_data')
+        CnnData()
+        # spin() simply keeps python from exiting until this node is stopped
+        rospy.spin()
+    except rospy.ROSInterruptException:
+        pass

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/cnn_model.py ADDED Viewed

	@@ -0,0 +1,783 @@

+#!/usr/bin/env python
+#
+# file: $ISIP_EXP/tuh_dpath/exp_0074/scripts/model.py
+#
+# revision history:
+#  20190925 (TE): first version
+#
+# usage:
+#
+# This script hold the model architecture
+#------------------------------------------------------------------------------
+# import pytorch modules
+#
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import numpy.matlib
+# import modules
+#
+import os
+import random
+# for reproducibility, we seed the rng
+#
+SEED1 = 1337
+NEW_LINE = "\n"
+#-----------------------------------------------------------------------------
+#
+# helper functions are listed here
+#
+#-----------------------------------------------------------------------------
+# function: set_seed
+#
+# arguments: seed - the seed for all the rng
+#
+# returns: none
+#
+# this method seeds all the random number generators and makes
+# the results deterministic
+#
+def set_seed(seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+#
+# end of method
+# calculate the angle of incidence of the lidar ray:
+def angle_incidence_calculation(b, c, alpha, last_ray=False):
+    '''
+    # remove invalid values:
+    if(last_ray): # the last ray
+        if(np.isnan(b) or np.isinf(b)):
+            b = 60.
+        if(np.isnan(c) or np.isinf(c)):
+            c = 60.
+    else:
+        b[np.isnan(b)] = 60.
+        b[np.isinf(b)] = 60.
+        c[np.isnan(c)] = 60.
+        c[np.isinf(c)] = 60.
+    '''
+    # the law of cosines:
+    a = np.sqrt(b*b + c*c - 2*b*c*np.cos(alpha))
+    if(last_ray): # the last ray
+        beta = np.arccos([(a*a + c*c - b*b)/(2*a*c)])
+        theta = np.abs(np.pi/2 - beta)
+    else:
+        gamma = np.arccos([(a*a + b*b - c*c)/(2*a*b)])
+        theta = np.abs(np.pi/2 - gamma)
+    return theta
+#------------------------------------------------------------------------------
+#
+# the S3-Net model is defined here
+#
+#------------------------------------------------------------------------------
+# Constants
+NUM_INPUT_CHANNELS = 3 #2
+#NUM_LATENT_DIM = 512 # 16*16*2
+NUM_OUTPUT_CHANNELS = 10 # 9 classes of semantic labels + 1 background
+# define the PyTorch VAE model
+#
+# define a VAE
+# Residual blocks:
+class Residual(nn.Module):
+    def __init__(self, in_channels, num_hiddens, num_residual_hiddens):
+        super(Residual, self).__init__()
+        self._block = nn.Sequential(
+            nn.ReLU(True),
+            nn.Conv1d(in_channels=in_channels,
+                      out_channels=num_residual_hiddens,
+                      kernel_size=3, stride=1, padding=1, bias=False),
+            nn.BatchNorm1d(num_residual_hiddens),
+            nn.ReLU(True),
+            nn.Conv1d(in_channels=num_residual_hiddens,
+                      out_channels=num_hiddens,
+                      kernel_size=1, stride=1, bias=False),
+            nn.BatchNorm1d(num_hiddens)
+        )
+    def forward(self, x):
+        return x + self._block(x)
+class ResidualStack(nn.Module):
+    def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
+        super(ResidualStack, self).__init__()
+        self._num_residual_layers = num_residual_layers
+        self._layers = nn.ModuleList([Residual(in_channels, num_hiddens, num_residual_hiddens)
+                             for _ in range(self._num_residual_layers)])
+    def forward(self, x):
+        for i in range(self._num_residual_layers):
+            x = self._layers[i](x)
+        return F.relu(x)
+# Encoder & Decoder Architecture:
+# Encoder:
+class Encoder(nn.Module):
+    def __init__(self, in_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
+        super(Encoder, self).__init__()
+        self._conv_1 = nn.Sequential(*[
+                                        nn.Conv1d(in_channels=in_channels,
+                                                  out_channels=num_hiddens//2,
+                                                  kernel_size=4,
+                                                  stride=2,
+                                                  padding=1),
+                                        nn.BatchNorm1d(num_hiddens//2),
+                                        nn.ReLU(True)
+                                    ])
+        self._conv_2 = nn.Sequential(*[
+                                        nn.Conv1d(in_channels=num_hiddens//2,
+                                                  out_channels=num_hiddens,
+                                                  kernel_size=4,
+                                                  stride=2,
+                                                  padding=1),
+                                        nn.BatchNorm1d(num_hiddens)
+                                        #nn.ReLU(True)
+                                    ])
+        self._residual_stack = ResidualStack(in_channels=num_hiddens,
+                                             num_hiddens=num_hiddens,
+                                             num_residual_layers=num_residual_layers,
+                                             num_residual_hiddens=num_residual_hiddens)
+    def forward(self, inputs):
+        x = self._conv_1(inputs)
+        x = self._conv_2(x)
+        x = self._residual_stack(x)
+        return x
+# Decoder:
+class Decoder(nn.Module):
+    def __init__(self, out_channels, num_hiddens, num_residual_layers, num_residual_hiddens):
+        super(Decoder, self).__init__()
+        self._residual_stack = ResidualStack(in_channels=num_hiddens,
+                                             num_hiddens=num_hiddens,
+                                             num_residual_layers=num_residual_layers,
+                                             num_residual_hiddens=num_residual_hiddens)
+        self._conv_trans_2 = nn.Sequential(*[
+                                            nn.ReLU(True),
+                                            nn.ConvTranspose1d(in_channels=num_hiddens,
+                                                              out_channels=num_hiddens//2,
+                                                              kernel_size=4,
+                                                              stride=2,
+                                                              padding=1),
+                                            nn.BatchNorm1d(num_hiddens//2),
+                                            nn.ReLU(True)
+                                        ])
+        self._conv_trans_1 = nn.Sequential(*[
+                                            nn.ConvTranspose1d(in_channels=num_hiddens//2,
+                                                              out_channels=num_hiddens//2,
+                                                              kernel_size=4,
+                                                              stride=2,
+                                                              padding=1,
+                                                              output_padding=1),
+                                            nn.BatchNorm1d(num_hiddens//2),
+                                            nn.ReLU(True),
+                                            nn.Conv1d(in_channels=num_hiddens//2,
+                                                      out_channels=out_channels,
+                                                      kernel_size=3,
+                                                      stride=1,
+                                                      padding=1),
+                                            #nn.Sigmoid()
+                                        ])
+    def forward(self, inputs):
+        x = self._residual_stack(inputs)
+        x = self._conv_trans_2(x)
+        x = self._conv_trans_1(x)
+        return x
+class VAE_Encoder(nn.Module):
+    def __init__(self, input_channel, num_hiddens, num_residual_layers, num_residual_hiddens, embedding_dim):
+        super(VAE_Encoder, self).__init__()
+        # parameters:
+        self.input_channels = input_channel
+        '''
+        # Constants
+        num_hiddens = 128 #128
+        num_residual_hiddens = 64 #32
+        num_residual_layers = 2
+        embedding_dim = 2 #64
+        '''
+        # encoder:
+        in_channels = input_channel
+        self._encoder = Encoder(in_channels,
+                                num_hiddens,
+                                num_residual_layers,
+                                num_residual_hiddens)
+        # z latent variable:
+        self._encoder_z_mu = nn.Conv1d(in_channels=num_hiddens,
+                                    out_channels=embedding_dim,
+                                    kernel_size=1,
+                                    stride=1)
+        self._encoder_z_log_sd = nn.Conv1d(in_channels=num_hiddens,
+                                    out_channels=embedding_dim,
+                                    kernel_size=1,
+                                    stride=1)
+    def forward(self, x):
+        # input reshape:
+        x = x.reshape(-1, self.input_channels, POINTS)
+        # Encoder:
+        encoder_out = self._encoder(x)
+        # get `mu` and `log_var`:
+        z_mu = self._encoder_z_mu(encoder_out)
+        z_log_sd = self._encoder_z_log_sd(encoder_out)
+        return z_mu, z_log_sd
+# our proposed model:
+class S3Net(nn.Module):
+    def __init__(self, input_channels, output_channels):
+        super(S3Net, self).__init__()
+        # parameters:
+        self.input_channels = input_channels
+        self.latent_dim = 270
+        self.output_channels = output_channels
+        # Constants
+        num_hiddens = 64 #128
+        num_residual_hiddens = 32 #64
+        num_residual_layers = 2
+        embedding_dim = 1 #2
+        # prediction encoder:
+        self._encoder = VAE_Encoder(self.input_channels,
+                                    num_hiddens,
+                                    num_residual_layers,
+                                    num_residual_hiddens,
+                                    embedding_dim)
+        # decoder:
+        self._decoder_z_mu = nn.ConvTranspose1d(in_channels=embedding_dim,
+                                    out_channels=num_hiddens,
+                                    kernel_size=1,
+                                    stride=1)
+        self._decoder = Decoder(self.output_channels,
+                                num_hiddens,
+                                num_residual_layers,
+                                num_residual_hiddens)
+        self.softmax = nn.Softmax(dim=1)
+    def vae_reparameterize(self, z_mu, z_log_sd):
+        """
+        :param mu: mean from the encoder's latent space
+        :param log_sd: log standard deviation from the encoder's latent space
+        :output: reparameterized latent variable z, Monte carlo KL divergence
+        """
+        # reshape:
+        z_mu = z_mu.reshape(-1, self.latent_dim, 1)
+        z_log_sd = z_log_sd.reshape(-1, self.latent_dim, 1)
+        # define the z probabilities (in this case Normal for both)
+        # p(z): N(z|0,I)
+        pz = torch.distributions.Normal(loc=torch.zeros_like(z_mu), scale=torch.ones_like(z_log_sd))
+        # q(z|x,phi): N(z|mu, z_var)
+        qz_x = torch.distributions.Normal(loc=z_mu, scale=torch.exp(z_log_sd))
+        # repameterization trick: z = z_mu + xi (*) z_log_var, xi~N(xi|0,I)
+        z = qz_x.rsample()
+        # Monte Carlo KL divergence: MCKL(p(z)||q(z|x,phi)) = log(p(z)) - log(q(z|x,phi))
+        # sum over weight dim, leaves the batch dim
+        kl_divergence = (pz.log_prob(z) - qz_x.log_prob(z)).sum(dim=1)
+        kl_loss = -kl_divergence.mean()
+        return z, kl_loss
+    def forward(self, x_s, x_i, x_a):
+        """
+        Forward pass `input_img` through the network
+        """
+        # reconstruction:
+        # encode:
+        # input reshape:
+        x_s = x_s.reshape(-1, 1, POINTS)
+        x_i = x_i.reshape(-1, 1, POINTS)
+        x_a = x_a.reshape(-1, 1, POINTS)
+        # concatenate along channel axis
+        x = torch.cat([x_s, x_i, x_a], dim=1)
+        # encode:
+        z_mu, z_log_sd = self._encoder(x)
+        # get the latent vector through reparameterization:
+        z, kl_loss = self.vae_reparameterize(z_mu, z_log_sd)
+        # decode:
+        # reshape:
+        z = z.reshape(-1, 1, 270)
+        x_d = self._decoder_z_mu(z)
+        semantic_channels = self._decoder(x_d)
+        # semantic grid: 10 channels
+        semantic_scan = self.softmax(semantic_channels)
+        return semantic_scan, semantic_channels, kl_loss
+#------------------------------------------------------------------------------
+#
+# ResNet blocks
+#
+#------------------------------------------------------------------------------
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class Bottleneck(nn.Module):
+    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
+    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+    # This variant is also known as ResNet V1.5 and improves accuracy according to
+    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+    expansion = 2 #4
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+#
+# end of ResNet blocks
+#------------------------------------------------------------------------------
+#
+# the model is defined here
+#
+#------------------------------------------------------------------------------
+# define the PyTorch MLP model
+#
+class SemanticCNN(nn.Module):
+    # function: init
+    #
+    # arguments: input_size - int representing size of input
+    #            hidden_size - number of nodes in the hidden layer
+    #            num_classes - number of classes to classify
+    #
+    # return: none
+    #
+    # This method is the main function.
+    #
+    def __init__(self, block, layers, cnn_in_channels=2, num_classes=2, zero_init_residual=True,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        # inherit the superclass properties/methods
+        #
+        super(SemanticCNN, self).__init__()
+        # define the model
+        #
+        ################## ped_pos net model: ###################
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(cnn_in_channels, self.inplanes, kernel_size=3, stride=1, padding=1,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.conv2_2 = nn.Sequential(
+            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1,1), padding=(1, 1)),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(256)
+        )
+        self.downsample2 = nn.Sequential(
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 1), stride=(2,2), padding=(0, 0)),
+            nn.BatchNorm2d(256)
+        )
+        self.relu2 = nn.ReLU(inplace=True)
+        self.conv3_2 = nn.Sequential(
+            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1,1), padding=(1, 1)),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(512)
+        )
+        self.downsample3 = nn.Sequential(
+            nn.Conv2d(in_channels=64, out_channels=512, kernel_size=(1, 1), stride=(4,4), padding=(0, 0)),
+            nn.BatchNorm2d(512)
+        )
+        self.relu3 = nn.ReLU(inplace=True)
+        # self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+        #                               dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(256 * block.expansion + 2, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm1d): # add by xzt
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def _forward_impl(self, scan, semantics, goal):
+        ###### Start of fusion net ######
+        scan_in = scan.reshape(-1,1,80,80)
+        semantics_in = semantics.reshape(-1,1,80,80)
+        fusion_in = torch.cat((scan_in, semantics_in), dim=1)
+        # See note [TorchScript super()]
+        x = self.conv1(fusion_in)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        identity3 = self.downsample3(x)
+        x = self.layer1(x)
+        identity2 = self.downsample2(x)
+        x = self.layer2(x)
+        x = self.conv2_2(x)
+        x += identity2
+        x = self.relu2(x)
+        x = self.layer3(x)
+        # x = self.layer4(x)
+        x = self.conv3_2(x)
+        x += identity3
+        x = self.relu3(x)
+        x = self.avgpool(x)
+        fusion_out = torch.flatten(x, 1)
+        ###### End of fusion net ######
+        ###### Start of goal net #######
+        goal_in = goal.reshape(-1,2)
+        goal_out = torch.flatten(goal_in, 1)
+        ###### End of goal net #######
+        # Combine
+        fc_in = torch.cat((fusion_out, goal_out), dim=1)
+        x = self.fc(fc_in)
+        return x
+    def forward(self, scan, semantics, goal):
+        return self._forward_impl(scan, semantics, goal)
+    #
+    # end of method
+#
+# end of class
+# define the PyTorch MLP model
+#
+class CNN(nn.Module):
+    # function: init
+    #
+    # arguments: input_size - int representing size of input
+    #            hidden_size - number of nodes in the hidden layer
+    #            num_classes - number of classes to classify
+    #
+    # return: none
+    #
+    # This method is the main function.
+    #
+    def __init__(self, block, layers, cnn_in_channels=2, num_classes=2, zero_init_residual=True,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        # inherit the superclass properties/methods
+        #
+        super(CNN, self).__init__()
+        # define the model
+        #
+        ################## ped_pos net model: ###################
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(cnn_in_channels, self.inplanes, kernel_size=3, stride=1, padding=1,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.conv2_2 = nn.Sequential(
+            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1,1), padding=(1, 1)),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(256)
+        )
+        self.downsample2 = nn.Sequential(
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 1), stride=(2,2), padding=(0, 0)),
+            nn.BatchNorm2d(256)
+        )
+        self.relu2 = nn.ReLU(inplace=True)
+        self.conv3_2 = nn.Sequential(
+            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1,1), padding=(1, 1)),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(512)
+        )
+        self.downsample3 = nn.Sequential(
+            nn.Conv2d(in_channels=64, out_channels=512, kernel_size=(1, 1), stride=(4,4), padding=(0, 0)),
+            nn.BatchNorm2d(512)
+        )
+        self.relu3 = nn.ReLU(inplace=True)
+        # self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+        #                               dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(256 * block.expansion + 2, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm1d): # add by xzt
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def _forward_impl(self, scan, goal):
+        ###### Start of fusion net ######
+        scan_in = scan.reshape(-1,1,80,80)
+        fusion_in = scan_in #torch.cat((scan_in, semantics_in), dim=1)
+        # See note [TorchScript super()]
+        x = self.conv1(fusion_in)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        identity3 = self.downsample3(x)
+        x = self.layer1(x)
+        identity2 = self.downsample2(x)
+        x = self.layer2(x)
+        x = self.conv2_2(x)
+        x += identity2
+        x = self.relu2(x)
+        x = self.layer3(x)
+        # x = self.layer4(x)
+        x = self.conv3_2(x)
+        x += identity3
+        x = self.relu3(x)
+        x = self.avgpool(x)
+        fusion_out = torch.flatten(x, 1)
+        ###### End of fusion net ######
+        ###### Start of goal net #######
+        goal_in = goal.reshape(-1,2)
+        goal_out = torch.flatten(goal_in, 1)
+        ###### End of goal net #######
+        # Combine
+        fc_in = torch.cat((fusion_out, goal_out), dim=1)
+        x = self.fc(fc_in)
+        return x
+    def forward(self, scan, goal):
+        return self._forward_impl(scan, goal)
+    #
+    # end of method
+#
+# end of class
+#
+# end of file

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/goal_visualize.py ADDED Viewed

	@@ -0,0 +1,40 @@

+#!/usr/bin/env python
+import rospy
+from std_msgs.msg import Header
+from geometry_msgs.msg import Point, PoseStamped
+from visualization_msgs.msg import Marker
+from visualization_msgs.msg import MarkerArray
+def goal_callback(goal_msg):
+    # initialize header and color
+    h = Header()
+    h.frame_id = "map"
+    h.stamp = rospy.Time.now()
+    # initialize goal marker message
+    goal_marker = Marker()
+    goal_marker.header = h
+    goal_marker.type = Marker.SPHERE
+    goal_marker.action = Marker.ADD
+    goal_marker.pose = goal_msg.pose
+    goal_marker.scale.x = 1.8
+    goal_marker.scale.y = 1.8
+    goal_marker.scale.z = 1.8
+    goal_marker.color.r = 1.0
+    goal_marker.color.g = 0.0
+    goal_marker.color.b = 0.0
+    goal_marker.color.a = 0.5 # set transparency
+    goal_vis_pub.publish(goal_marker)
+if __name__ == '__main__':
+    try:
+        rospy.init_node('goal_vis')
+        goal_sub = rospy.Subscriber("/move_base/current_goal", PoseStamped, goal_callback)
+        goal_vis_pub = rospy.Publisher('goal_markers', Marker, queue_size=1, latch=True)
+        # spin() simply keeps python from exiting until this node is stopped
+        rospy.spin()
+    except rospy.ROSInterruptException:
+        pass

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/model/s3_net_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86ffcba0092e8e20d80fc02e5e01bb675c60d0c897d8830305ecc5b8b20b6dbb
+size 741507

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/model/semantic_cnn_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ed1ba15e9c9df1c7c0b5f1f45545f57ad30134d5bb7673e77ee7df106358a60
+size 28985757

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/pure_pursuit.py ADDED Viewed

	@@ -0,0 +1,254 @@

+#!/usr/bin/env python
+import rospy
+from nav_msgs.msg import Path
+from geometry_msgs.msg import Twist, PoseStamped
+import tf
+from scipy.optimize import linprog
+from geometry_msgs.msg import Point
+import numpy as np
+import threading
+class PurePursuit:
+    # Constructor
+    def __init__(self):
+        # initialize parameters
+        self.lookahead = 2 #rospy.get_param('~lookahead', 5.0)
+        self.rate = 20 #rospy.get_param('~rate', 20.)
+        self.timer = None
+        self.path = None # store the path to the goal
+        self.lock = threading.Lock() # lock to keep data thread safe
+        # Initialize ROS objects
+        #self.goal_sub = rospy.Subscriber("/move_base/current_goal", PoseStamped, self.goal_callback)
+        self.path_sub = rospy.Subscriber('move_base/NavfnROS/plan', Path, self.path_callback)
+        self.tf_listener = tf.TransformListener()
+        #self.cmd_vel_pub = rospy.Publisher('cmd_vel', Twist, queue_size=10)
+        self.cnn_goal_pub = rospy.Publisher('cnn_goal', Point, queue_size=1)#, latch=True)
+        self.final_goal_pub = rospy.Publisher('final_goal', Point, queue_size=1)#, latch=True)
+    # Callback function for the path subscriber
+    def path_callback(self, msg):
+        rospy.logdebug('PurePursuit: Got path')
+        # lock this data to ensure that it is not changed while other processes are using it
+        self.lock.acquire()
+        self.path = msg # store the path in the class member
+        self.lock.release()
+        # start the timer if this is the first path received
+        if self.timer is None:
+            self.start()
+    # Start the timer that calculates command velocities
+    def start(self):
+        # initialize timer for controller update
+        self.timer = rospy.Timer(rospy.Duration(1./self.rate), self.timer_callback)
+    # Get the current pose of the robot from the tf tree
+    def get_current_pose(self):
+        trans = rot = None
+        # look up the current pose of the base_link using the tf tree
+        try:
+            (trans,rot) = self.tf_listener.lookupTransform('/map', '/base_link', rospy.Time(0))
+        except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
+            rospy.logwarn('Could not get robot pose')
+            return (np.array([np.nan, np.nan]), np.nan)
+        x = np.array([trans[0], trans[1]])
+        (roll, pitch, theta) = tf.transformations.euler_from_quaternion(rot)
+        rospy.logdebug("x = {}, y = {}, theta = {}".format(x[0], x[1], theta))
+        return (x, theta)
+    # Find the closest point on the current path to the point x
+    # Inputs:
+    #   x = numpy array with 2 elements (x and y position of robot)
+    #   seg = optional argument that selects which segment of the path to compute the closest point on
+    # Outputs:
+    #   pt_min = closest point on the path to x
+    #   dist_min = distance from the closest point to x
+    #   seg_min = index of closest segment to x
+    def find_closest_point(self, x, seg=-1):
+        # initialize return values
+        pt_min = np.array([np.nan, np.nan])
+        dist_min = np.inf
+        seg_min = -1
+        # check if path has been received yet
+        if self.path is None:
+            rospy.logwarn('Pure Pursuit: No path received yet')
+            return (pt_min, dist_min, seg_min)
+        ##### YOUR CODE STARTS HERE #####
+        if seg == -1:
+            # find closest point on entire pathd
+            for i in range(len(self.path.poses) - 1): # gets total number of segments and iterates over them all
+                (pt, dist, s) = self.find_closest_point(x, i) # find the closest point to the robot on segment i
+                if dist < dist_min: # if new point is closer than the previous best, keep it as the new best point
+                    pt_min = pt
+                    dist_min = dist
+                    seg_min = s
+        else:
+            # find closest point on segment seg
+            # extract the start and end of segment seg from the path
+            p_start = np.array([self.path.poses[seg].pose.position.x, self.path.poses[seg].pose.position.y])
+            p_end = np.array([self.path.poses[seg+1].pose.position.x, self.path.poses[seg+1].pose.position.y])
+            # calculate the unit direction vector and segment length
+            v = p_end - p_start
+            length_seg = np.linalg.norm(v)
+            v = v / length_seg
+            # calculate projected distance
+            dist_projected = np.dot(x - p_start, v)
+            # find closest point on the line segment to x
+            if dist_projected < 0.:
+                pt_min = p_start
+            elif dist_projected > length_seg:
+                pt_min = p_end
+            else:
+                pt_min = p_start + dist_projected * v
+            # calculate other outputs
+            dist_min = np.linalg.norm(pt_min - x)
+            seg_min = seg
+        ##### YOUR CODE ENDS HERE #####
+        return (pt_min, dist_min, seg_min)
+    # Find the goal point to drive the robot towards
+    # Inputs:
+    #   x = numpy array with 2 elements (x and y position of robot)
+    #   pt, dist, seg = outputs of find_closest_point
+    # Outputs:
+    #   goal = numpy array with 2 elements (x and y position of goal)
+    def find_goal(self, x, pt, dist, seg):
+        goal = None
+        end_goal_pos = None
+        end_goal_rot = None
+        if dist > self.lookahead:
+            # if further than lookahead from the path, drive towards the path
+            goal = pt
+        else:
+            ##### YOUR CODE STARTS HERE #####
+            seg_max = len(self.path.poses) - 2
+            # extract the end of segment seg from the path
+            p_end = np.array([self.path.poses[seg+1].pose.position.x, self.path.poses[seg+1].pose.position.y])
+            # calculate the distance from x to p_end:
+            dist_end = np.linalg.norm(x - p_end)
+            # start from the nearest segment and iterate forward until you find either the last segment or a segment that leaves the lookahead circle
+            while(dist_end < self.lookahead and seg < seg_max):
+                seg = seg + 1
+                # extract the end of segment seg from the path
+                p_end = np.array([self.path.poses[seg+1].pose.position.x, self.path.poses[seg+1].pose.position.y])
+                # calculate the distance from x to p_end:
+                dist_end = np.linalg.norm(x - p_end)
+            # if searched the whole path, set the goal as the end of the path
+            if(dist_end < self.lookahead):
+                pt = np.array([self.path.poses[seg_max+1].pose.position.x, self.path.poses[seg_max+1].pose.position.y])
+            # if found a segment that leaves the circle, find the intersection with the circle
+            else:
+                # find the closest point:
+                (pt, dist, seg) = self.find_closest_point(x, seg)
+                # extract the start and end of segment seg from the path
+                p_start = np.array([self.path.poses[seg].pose.position.x, self.path.poses[seg].pose.position.y])
+                p_end = np.array([self.path.poses[seg+1].pose.position.x, self.path.poses[seg+1].pose.position.y])
+                # calculate the unit direction vector and segment length
+                v = p_end - p_start
+                length_seg = np.linalg.norm(v)
+                v = v / length_seg
+                # calculate projected distance:
+                dist_projected_x = np.dot(x - pt, v)
+                dist_projected_y = np.linalg.norm(np.cross(x - pt, v))
+                pt = pt + (np.sqrt(self.lookahead**2 - dist_projected_y**2) + dist_projected_x)*v
+            goal = pt
+            ##### YOUR CODE ENDS HERE #####
+        end_goal_pos = [self.path.poses[-1].pose.position.x, self.path.poses[-1].pose.position.y]
+        end_goal_rot = [self.path.poses[-1].pose.orientation.x, self.path.poses[-1].pose.orientation.y, \
+                            self.path.poses[-1].pose.orientation.z, self.path.poses[-1].pose.orientation.w,]
+        return (goal, end_goal_pos, end_goal_rot)
+    # function that runs every time the timer finishes to ensure that velocity commands are sent regularly
+    def timer_callback(self, event):
+        # lock the path to ensure it is not updated during processing
+        self.lock.acquire()
+        try:
+            # get current pose
+            # (x, theta) = self.get_current_pose()
+            trans = rot = None
+            # look up the current pose of the base_link using the tf tree
+            try:
+                (trans,rot) = self.tf_listener.lookupTransform('map', 'base_link', rospy.Time(0))
+            except (tf.LookupException, tf.ConnectivityException, tf.ExtrapolationException):
+                rospy.logwarn('Could not get robot pose')
+                return (np.array([np.nan, np.nan]), np.nan)
+            x = np.array([trans[0], trans[1]])
+            (roll, pitch, theta) = tf.transformations.euler_from_quaternion(rot)
+            rospy.logdebug("x = {}, y = {}, theta = {}".format(x[0], x[1], theta))
+            if np.isnan(x[0]): # ensure data is valid
+                return
+            # find the closest point
+            (pt, dist, seg) = self.find_closest_point(x)
+            if np.isnan(pt).any(): # ensure data is valid
+                return
+            # find the goal point
+            (goal, end_goal_pos, end_goal_rot) = self.find_goal(x, pt, dist, seg)
+            if goal is None or end_goal_pos is None: # ensure data is valid
+                return
+        finally:
+            # ensure the lock is released
+            self.lock.release()
+        # transform goal to local coordinates
+        ##### YOUR CODE STARTS HERE #####
+        # homogeneous transformation matrix:
+        map_T_robot = np.array([[np.cos(theta), -np.sin(theta), x[0]],
+                                    [np.sin(theta), np.cos(theta), x[1]],
+                                    [0, 0, 1]])
+        goal = np.matmul(np.linalg.inv(map_T_robot), np.array([[goal[0]],[goal[1]],[1]])) #np.dot(np.linalg.inv(map_T_robot), np.array([goal[0], goal[1],1])) #
+        goal = goal[0:2]
+        ##### YOUR CODE ENDS HERE #####
+        # final relative goal:
+        relative_goal = np.matmul(np.linalg.inv(map_T_robot), np.array([[end_goal_pos[0]],[end_goal_pos[1]],[1]]))
+        # Compute the difference to the goal orientation
+        orientation_to_target = tf.transformations.quaternion_multiply(end_goal_rot, \
+                tf.transformations.quaternion_inverse(rot))
+        yaw = tf.transformations.euler_from_quaternion(orientation_to_target)[2]
+        # publish the cnn goal:
+        cnn_goal = Point()
+        cnn_goal.x = goal[0]
+        cnn_goal.y = goal[1]
+        cnn_goal.z = 0
+        if not np.isnan(cnn_goal.x) and not np.isnan(cnn_goal.y): # ensure data is valid
+                self.cnn_goal_pub.publish(cnn_goal)
+        # publish the final goal:
+        final_goal = Point()
+        final_goal.x = relative_goal[0]
+        final_goal.y = relative_goal[1]
+        final_goal.z = yaw
+        if not np.isnan(final_goal.x) and not np.isnan(final_goal.y): # ensure data is valid
+                self.final_goal_pub.publish(final_goal)
+if __name__ == '__main__':
+    try:
+        rospy.init_node('pure_pursuit')
+        PurePursuit()
+        # spin() simply keeps python from exiting until this node is stopped
+        rospy.spin()
+    except rospy.ROSInterruptException:
+        pass

ros_deployment_ws/src/semantic_cnn_nav/semantic_cnn/src/semantic_cnn_nav_inference.py ADDED Viewed

	@@ -0,0 +1,256 @@

+#!/usr/bin/env python
+#
+# file: $ISIP_EXP/tuh_dpath/exp_0074/scripts/decode.py
+#
+# revision history:
+#  20190925 (TE): first version
+#
+# usage:
+#  python decode.py odir mfile data
+#
+# arguments:
+#  odir: the directory where the hypotheses will be stored
+#  mfile: input model file
+#  data: the input data list to be decoded
+#
+# This script decodes data using a simple MLP model.
+#------------------------------------------------------------------------------
+# import modules
+#
+import sys
+import os
+# ros:
+import rospy
+#import tf
+import numpy as np
+import torch
+# custom define messages:
+from sensor_msgs.msg import LaserScan
+from cnn_msgs.msg import CNN_data
+from geometry_msgs.msg import Twist
+from geometry_msgs.msg import Point
+from cnn_model import *
+#-----------------------------------------------------------------------------
+#
+# global variables are listed here
+#
+#-----------------------------------------------------------------------------
+POINTS = 1080 #1081
+SEQ_LEN = 10
+IMG_SIZE = 80
+#------------------------------------------------------------------------------
+#
+# the main program starts here
+#
+#------------------------------------------------------------------------------
+class SemanticCnnInference:
+    # Constructor
+    def __init__(self):
+        # initialize data:
+        self.scan_his = []
+        self.intensity_his = []
+        self.scan = []
+        self.goal = []
+        self.vx = 0
+        self.wz = 0
+        # parameters: data mean std: scan, sub_goal, intensity, angle of incidence:
+        #  [[4.518406, 8.2914915], [0.30655652, 0.5378557], [3081.8167, 1529.4413], [0.5959513, 0.4783924]]
+        self.s_mu = 4.518406
+        self.s_std = 8.2914915
+        self.g_mu = 0.30655652
+        self.g_std = 0.5378557
+        self.i_mu = 3081.8167
+        self.i_std = 1529.4413
+        self.a_mu = 0.5959513
+        self.a_std = 0.4783924
+        # s3-net:
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.alpha = np.ones(POINTS - 1)*((270*np.pi / 180) / (POINTS - 1))
+        # instantiate the S3-Net model:
+        self.s3_net_model = S3Net(input_channels=NUM_INPUT_CHANNELS,
+                    output_channels=NUM_OUTPUT_CHANNELS)
+        # moves the model to device (cpu in our case so no change):
+        self.s3_net_model.to(self.device)
+        self.s3_net_model.eval()
+        # load the weights
+        #
+        s3_net_mdl_path = rospy.get_param('~s3_net_model_file', "./model/s3_net_model.pth") #'./model/s3_net_model.pth'
+        s3_net_checkpoint = torch.load(s3_net_mdl_path, map_location=self.device)
+        self.s3_net_model.load_state_dict(s3_net_checkpoint['model'])
+        # semantic cnn:
+        # instantiate a model:
+        self.cnn_model = SemanticCNN(Bottleneck, [2, 1, 1], cnn_in_channels=2)
+        # moves the model to device (cpu in our case so no change):
+        self.cnn_model.to(self.device)
+        self.cnn_model.eval()
+        # load the weights
+        #
+        cnn_mdl_path = rospy.get_param('~semantic_cnn_model_file', "./model/semantic_cnn_model.pth")
+        checkpoint = torch.load(cnn_mdl_path, map_location=self.device)
+        self.cnn_model.load_state_dict(checkpoint['model'])
+        print("Finish loading model.")
+        # initialize ROS objects
+        self.cnn_data_sub = rospy.Subscriber("/cnn_data", CNN_data, self.cnn_data_callback, queue_size=1, buff_size=2**24)
+        self.cmd_vel_pub = rospy.Publisher('/navigation_velocity_smoother/raw_cmd_vel', Twist, queue_size=1, latch=False)
+    # Callback function for the cnn_data subscriber
+    def cnn_data_callback(self, cnn_data_msg):
+        # self.ped_pos = cnn_data_msg.ped_pos_map
+        self.scan_his = np.array(cnn_data_msg.scan_his)
+        self.intensity_his = np.array(cnn_data_msg.intensity_his)
+        self.scan = np.array(cnn_data_msg.scan)
+        self.goal = np.array(cnn_data_msg.goal)
+        # minimum distance:
+        scan_cur = np.array(self.scan[360-40:-360+40])
+        #print(scan)
+        scan_cur = scan_cur[scan_cur!=0]
+        if(scan_cur.size!=0):
+            min_scan_dist = np.amin(scan_cur)
+        else:
+            min_scan_dist = 10
+        #print(min_scan_dist)
+        cmd_vel = Twist()
+        # if the goal is close to the robot:
+        if np.linalg.norm(self.goal) <= 0.9: #or min_scan_dist <= 0.4:
+            cmd_vel.linear.x = 0
+            cmd_vel.angular.z = 0
+            #print(min_scan_dist)
+        elif min_scan_dist <= 0.6:
+            cmd_vel.linear.x = 0
+            cmd_vel.angular.z = 0.7
+        else:
+            # create lidar historical map:
+            scan_avg = np.zeros((20, IMG_SIZE))
+            semantic_avg = np.zeros((20, IMG_SIZE))
+            for n in range(SEQ_LEN):
+                # get the scan and intensity data:
+                scan = self.scan_his[n*POINTS:(n+1)*POINTS]
+                intensity = self.intensity_his[n*POINTS:(n+1)*POINTS]
+                # get the angle of incidence of the ray:
+                b = self.scan[:-1]
+                c = self.scan[1:]
+                # alpha = np.ones(POINTS - 1)*((270*np.pi / 180) / (POINTS - 1))
+                theta = angle_incidence_calculation(b, c, self.alpha)
+                # last ray:
+                b_last = self.scan[-2]
+                c_last = self.scan[-1]
+                alpha_last = (270*np.pi / 180) / (POINTS - 1)
+                theta_last = angle_incidence_calculation(b_last, c_last, alpha_last, last_ray=True)
+                angle_incidence = np.concatenate((theta[0], theta_last), axis=0)
+                # initialize:
+                scan[np.isnan(scan)] = 0.
+                scan[np.isinf(scan)] = 0.
+                intensity[np.isnan(intensity)] = 0.
+                intensity[np.isinf(intensity)] = 0.
+                angle_incidence[np.isnan(angle_incidence)] = 0.
+                angle_incidence[np.isinf(angle_incidence)] = 0.
+                # get valid range data:
+                scan_tmp = scan[180:-180]
+                # get semantic lidar data:
+                with torch.no_grad():
+                    # S3-Net inference:
+                    # data normalization:
+                    # standardization: scan
+                    # mu: 4.518406, std: 8.2914915
+                    scan = (scan - self.s_mu) / self.s_std
+                    # standardization: intensity
+                    # mu: 3081.8167, std: 1529.4413
+                    intensity = (intensity - self.i_mu) / self.i_std
+                    # standardization: angle_incidence
+                    # mu: 0.5959513, std: 0.4783924
+                    angle_incidence = (angle_incidence - self.a_mu) / self.a_std
+                    # feed the batch to the network:
+                    semantic_scans, _, _ = self.s3_net_model(torch.FloatTensor(scan).to(self.device),
+                                                            torch.FloatTensor(intensity).to(self.device),
+                                                            torch.FloatTensor(angle_incidence).to(self.device)
+                                                            )
+                    semantic_scans_mx = semantic_scans.argmax(dim=1)
+                    # # majority vote:
+                    # semantic_scans_mx_mean = semantic_scans_mx.mode(dim=0).values
+                    semantic_label = semantic_scans_mx.squeeze() #.data.cpu().numpy()
+                # get valid range data:
+                semantic_label_tmp = semantic_label[180:-180]
+                # get scan map:
+                for i in range(IMG_SIZE):
+                    tmp = scan_tmp[i*9:(i+1)*9]
+                    semantic_tmp = semantic_label_tmp[i*9:(i+1)*9]
+                    # min-map:
+                    i_min = np.argmin(tmp)
+                    scan_avg[2*n, i] = tmp[i_min]
+                    semantic_avg[2*n, i] = semantic_tmp[i_min].cpu().numpy()
+                    # avg-map:
+                    scan_avg[2*n+1, i] = np.mean(tmp)
+                    # majority vote:
+                    semantic_avg[2*n, i] = semantic_tmp.mode(dim=0).values.cpu().numpy()
+            scan_avg = scan_avg.reshape(1600)
+            scan_avg_map = np.matlib.repmat(scan_avg,1,4)
+            scan_map = scan_avg_map.reshape(6400)
+            semantic_avg = semantic_avg.reshape(1600)
+            semantic_avg_map = np.matlib.repmat(semantic_avg,1,4)
+            semantic_map = semantic_avg_map.reshape(6400)
+            # data normalization:
+            # standardization: scan
+            # mu: 4.518406, std: 8.2914915
+            scan_map = (scan_map - self.s_mu) / self.s_std
+            # goal:
+            sub_goal = np.array(self.goal, dtype=np.float32)
+            # standardization: sub goal
+            # mu: 4.518406, std: 8.2914915
+            sub_goal = (sub_goal - self.g_mu) / self.g_std
+            #self.inference()
+            action = self.cnn_model(torch.FloatTensor(scan_map).to(self.device),
+                                                    torch.FloatTensor(semantic_map).to(self.device),
+                                                    torch.FloatTensor(sub_goal).to(self.device)
+                                                    )
+            # calculate the goal velocity of the robot and send the command
+            # velocities:
+            cmd_vel.linear.x = action.squeeze().data.cpu().numpy()[0]
+            cmd_vel.angular.z = action.squeeze().data.cpu().numpy()[1]
+        if not np.isnan(cmd_vel.linear.x) and not np.isnan(cmd_vel.angular.z): # ensure data is valid
+            self.cmd_vel_pub.publish(cmd_vel)
+    # end of function
+# begin gracefully
+#
+if __name__ == '__main__':
+    rospy.init_node('semantic_cnn_inference')
+    drl_infe = SemanticCnnInference()
+    rospy.spin()
+# end of file

training/model/semantic_cnn_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ed1ba15e9c9df1c7c0b5f1f45545f57ad30134d5bb7673e77ee7df106358a60
+size 28985757

training/run_eval.sh ADDED Viewed

	@@ -0,0 +1,63 @@

+#!/bin/sh
+#
+# file: run.sh
+#
+# This is a simple driver script that runs training and then decoding
+# on the training set, the dev test set and the eval set.
+#
+# To run this script, execute the following line:
+#
+#  run.sh train.dat test.dat eval.dat
+#
+# The first argument ($1) is the training data. The last two arguments,
+# test data ($2) and evaluation data ($3) are optional.
+#
+# An example of how to run this is as follows:
+#
+# nedc_000_[1]: echo $PWD
+# /data/isip/exp/tuh_dpath/exp_0074/v1.0
+# nedc_000_[1]: ./run.sh data/train_set.txt data/dev_set.txt data/eval_set.txt
+#
+# This script will take you through the sequence of steps required to
+# train a simple MLP network and evaluate it on some data.
+#
+# The script will then take the trained models and do an evaluation
+# on the data in "test.dat". It will output the results to output/results.txt.
+#
+# If an eval set is specified, it will do the same for the eval set.
+#
+# decode the number of command line arguments
+#
+NARGS=$#
+if (test "$NARGS" -eq "0") then
+    echo "usage: run.sh train.dat [test.dat] [eval.dat]"
+    exit 1
+fi
+# define a base directory for the experiment
+#
+DL_EXP=`pwd`;
+DL_SCRIPTS="$DL_EXP/scripts";
+DL_OUT="$DL_EXP/output";
+DL_DECODE_ODIR="$DL_OUT";
+# define the output directories for training/decoding/scoring
+#
+#DL_TRAIN_ODIR="$DL_OUT/00_train";
+DL_TRAIN_ODIR="$DL_EXP/model";
+DL_MDL_PATH="$DL_TRAIN_ODIR/semantic_cnn_model.pth";
+# evaluate each data set that was specified
+#
+echo "... starting evaluation of $1 ..."
+$DL_SCRIPTS/decode_demo.py $DL_DECODE_ODIR $DL_MDL_PATH $1 | \
+    tee $DL_OUT/01_decode_train.log | grep "Average"
+echo "... finished evaluation of $1 ..."
+echo "======= end of results ======="
+#
+# exit gracefully

training/run_train.sh ADDED Viewed

	@@ -0,0 +1,73 @@

+#!/bin/sh
+#
+# file: run.sh
+#
+# This is a simple driver script that runs training and then decoding
+# on the training set, the dev test set and the eval set.
+#
+# To run this script, execute the following line:
+#
+#  run.sh train.dat test.dat eval.dat
+#
+# The first argument ($1) is the training data. The last two arguments,
+# test data ($2) and evaluation data ($3) are optional.
+#
+# An example of how to run this is as follows:
+#
+# nedc_000_[1]: echo $PWD
+# /data/isip/exp/tuh_dpath/exp_0074/v1.0
+# nedc_000_[1]: ./run.sh data/train_set.txt data/dev_set.txt data/eval_set.txt
+#
+# This script will take you through the sequence of steps required to
+# train a simple MLP network and evaluate it on some data.
+#
+# The script will then take the trained models and do an evaluation
+# on the data in "test.dat". It will output the results to output/results.txt.
+#
+# If an eval set is specified, it will do the same for the eval set.
+#
+# decode the number of command line arguments
+#
+NARGS=$#
+if (test "$NARGS" -eq "0") then
+    echo "usage: run.sh train.dat [test.dat] [eval.dat]"
+    exit 1
+fi
+# define a base directory for the experiment
+#
+DL_EXP=`pwd`;
+DL_SCRIPTS="$DL_EXP/scripts";
+DL_OUT="$DL_EXP/output";
+DL_LABELS="$DL_EXP/labels";
+# define the number of feats environment variable
+#
+export DL_NUM_FEATS=5 #26
+# define the output directories for training/decoding/scoring
+#
+#DL_TRAIN_ODIR="$DL_OUT/00_train";
+DL_TRAIN_ODIR="$DL_EXP/model";
+DL_MDL_PATH="$DL_TRAIN_ODIR/model.pth";
+DL_DECODE_ODIR="$DL_OUT/01_hyp";
+DL_HYP_TRAIN="$DL_DECODE_ODIR/train_set.hyp";
+DL_HYP_DEV="$DL_DECODE_ODIR/dev_set.hyp";
+DL_HYP_EVAL="$DL_DECODE_ODIR/eval_set.hyp";
+# create the output directory
+#
+#rm -fr $DL_OUT
+#mkdir -p $DL_OUT
+# execute training: training must always be run
+#
+echo "... starting training on $1 ..."
+$DL_SCRIPTS/train.py $DL_MDL_PATH $1 $2 | tee $DL_OUT/00_train.log | \
+      grep "reading\|Step\|Average\|Warning\|Error"
+echo "... finished training on $1 ..."
+#

training/scripts/__pycache__/model.cpython-38.pyc ADDED Viewed

Binary file (8.39 kB). View file

training/scripts/decode_demo.py ADDED Viewed

	@@ -0,0 +1,201 @@

+#!/usr/bin/env python
+#
+# file: $ISIP_EXP/tuh_dpath/exp_0074/scripts/decode.py
+#
+# revision history:
+#  20190925 (TE): first version
+#
+# usage:
+#  python decode.py odir mfile data
+#
+# arguments:
+#  odir: the directory where the hypotheses will be stored
+#  mfile: input model file
+#  data: the input data list to be decoded
+#
+# This script decodes data using a simple MLP model.
+#------------------------------------------------------------------------------
+# import pytorch modules
+#
+import torch
+import torch.nn as nn
+from tqdm import tqdm
+# import the model and all of its variables/functions
+#
+from model import *
+# visualize:
+import numpy as np
+# import modules
+#
+import sys
+import os
+from sklearn.metrics import explained_variance_score, mean_squared_error
+#-----------------------------------------------------------------------------
+#
+# global variables are listed here
+#
+#-----------------------------------------------------------------------------
+model_dir = './model/semantic_SemanticCNN_model.pth'  # the path of model storage: 1400 is the best one
+NUM_ARGS = 3
+HYP_EXT = ".hyp"
+GRT_EXT = ".grt"
+# general global values
+#
+SPACE = " "
+NEW_LINE = "\n"
+#------------------------------------------------------------------------------
+#
+# the main program starts here
+#
+#------------------------------------------------------------------------------
+### explained variance:
+def explained_variance(input, target):
+    ev = 1 - np.var(target - input) / np.var(input)
+    return ev
+# function: main
+#
+# arguments: none
+#
+# return: none
+#
+# This method is the main function.
+#
+def main(argv):
+    # ensure we have the correct amount of arguments
+    #
+    #global cur_batch_win
+    if(len(argv) != NUM_ARGS):
+        print("usage: python nedc_train_mdl.py [MDL_PATH] [TRAIN_PATH] [DEV_PATH]")
+        exit(-1)
+    # define local variables
+    #
+    # define local variables:
+    odir = argv[0]
+    mdl_path = argv[1]
+    pTest = argv[2]
+    # if the odir doesn't exist, we make it
+    #
+    if not os.path.exists(odir):
+        os.makedirs(odir)
+    # get the hyp file name
+    #
+    hyp_name = os.path.splitext(os.path.basename(pTest))[0] + HYP_EXT
+    grt_name = os.path.splitext(os.path.basename(pTest))[0] + GRT_EXT
+    # set the device to use GPU if available
+    #
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # get array of the data
+    # data: [[0, 1, ... 26], [27, 28, ...] ...]
+    # labels: [0, 0, 1, ...]
+    #
+    eval_dataset = NavDataset(pTest,'test')
+    eval_dataloader = torch.utils.data.DataLoader(eval_dataset, batch_size=1, \
+                                                   shuffle=False, drop_last=True) #, pin_memory=True)
+    # instantiate a model:
+    model = SemanticCNN(Bottleneck, [2, 1, 1])
+    # moves the model to device (cpu in our case so no change):
+    model.to(device)
+    # set the model to evaluate
+    #
+    model.eval()
+    # set the loss criterion:
+    criterion = nn.MSELoss(reduction='sum')
+    criterion.to(device)
+    # load the weights
+    #
+    checkpoint = torch.load(mdl_path, map_location=device)
+    model.load_state_dict(checkpoint['model'])
+    # the output file
+    #
+    try:
+        ofile = open(os.path.join(odir, hyp_name), 'w+')
+        vel_file = open(os.path.join(odir, grt_name), 'w+')
+    except IOError as e:
+        print(os.path.join(odir, hyp_name))
+        print("[%s]: %s" % (hyp_name, e.strerror))
+        exit(-1)
+    # for each batch in increments of batch size:
+    counter = 0
+    running_loss = 0
+    # get the number of batches (ceiling of train_data/batch_size):
+    num_batches = int(len(eval_dataset)/eval_dataloader.batch_size)
+    with torch.no_grad():
+        for i, batch in tqdm(enumerate(eval_dataloader), total=num_batches):
+        #for i, batch in enumerate(dataloader, 0):
+            counter += 1
+            # collect the samples as a batch:
+            scan_maps = batch['scan_map']
+            scan_maps = scan_maps.to(device)
+            semantic_maps = batch['semantic_map']
+            semantic_maps = semantic_maps.to(device)
+            sub_goals = batch['sub_goal']
+            sub_goals = sub_goals.to(device)
+            velocities = batch['velocity']
+            velocities = velocities.to(device)
+            # feed the network the batch
+            #
+            output = model(scan_maps, semantic_maps, sub_goals)
+            #writer.add_graph(model,[batch_ped_pos_t, batch_scan_t, batch_goal_t])
+            # get the loss
+            #
+            loss = criterion(output, velocities)
+            # get the loss:
+            # multiple GPUs:
+            if torch.cuda.device_count() > 1:
+                loss = loss.mean()
+            running_loss += loss.item()
+            # write the highest probablity to the file
+            #
+            ofile.write(str(float(output.data.cpu().numpy()[0,0])) + \
+                        SPACE + str(float(output.data.cpu().numpy()[0,1])) + NEW_LINE)
+            vel_file.write(str(float(velocities[0,0])) + \
+                        SPACE + str(float(velocities[0,1])) + NEW_LINE)
+    # loss:
+    val_loss = running_loss / counter
+    print('Validation set: Average loss: {:.4f}'.format(val_loss))
+    # close the file
+    #
+    ofile.close()
+    vel_file.close()
+    # exit gracefully
+    #
+    return True
+#
+# end of function
+# begin gracefully
+#
+if __name__ == '__main__':
+    main(sys.argv[1:])
+#
+# end of file

training/scripts/model.py ADDED Viewed

	@@ -0,0 +1,475 @@

+#!/usr/bin/env python
+#
+# file: $ISIP_EXP/SOGMP/scripts/model.py
+#
+# revision history: xzt
+#  20220824 (TE): first version
+#
+# usage:
+#
+# This script hold the model architecture
+#------------------------------------------------------------------------------
+# import pytorch modules
+#
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+# import modules
+#
+import os
+import random
+# for reproducibility, we seed the rng
+#
+SEED1 = 1337
+NEW_LINE = "\n"
+#-----------------------------------------------------------------------------
+#
+# helper functions are listed here
+#
+#-----------------------------------------------------------------------------
+# function: set_seed
+#
+# arguments: seed - the seed for all the rng
+#
+# returns: none
+#
+# this method seeds all the random number generators and makes
+# the results deterministic
+#
+def set_seed(seed):
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+    random.seed(seed)
+    os.environ['PYTHONHASHSEED'] = str(seed)
+#
+# end of method
+# function: get_data
+#
+# arguments: fp - file pointer
+#            num_feats - the number of features in a sample
+#
+# returns: data - the signals/features
+#          labels - the correct labels for them
+#
+# this method takes in a fp and returns the data and labels
+POINTS = 1081
+IMG_SIZE = 80
+SEQ_LEN = 10
+class NavDataset(torch.utils.data.Dataset):
+    def __init__(self, img_path, file_name):
+        # initialize the data and labels
+        self.npy_names = []
+        self.lengths = []
+        # parameters: data mean std: scan, sub_goal, intensity, angle of incidence:
+        #  [[4.518406, 8.2914915], [0.30655652, 0.5378557], [3081.8167, 1529.4413], [0.5959513, 0.4783924]]
+        self.s_mu = 4.518406
+        self.s_std = 8.2914915
+        self.g_mu = 0.30655652
+        self.g_std = 0.5378557
+        self.i_mu = 3081.8167
+        self.i_std = 1529.4413
+        self.a_mu = 0.5959513
+        self.a_std = 0.4783924
+        # open train.txt or dev.txt:
+        fp_folder = open(img_path+'dataset.txt','r')
+        # for each line of the file:
+        for folder_line in fp_folder.read().split(NEW_LINE):
+            if('-' in folder_line):
+                npy_name = []
+                folder_path = folder_line
+                fp_file = open(img_path+folder_path+'/'+file_name+'.txt', 'r')
+                for line in fp_file.read().split(NEW_LINE):
+                    if('.npy' in line):
+                        npy_name.append(img_path+folder_path+line)
+                self.lengths.append(len(npy_name))
+                self.npy_names.append(npy_name)
+                # close txt file:
+                fp_file.close()
+        # close txt file:
+        fp_folder.close()
+        self.length = np.sum(self.lengths)
+        self.cumsum_lengths = np.cumsum(self.lengths).tolist()
+        print("dataset length: ", self.length)
+    def __len__(self):
+        return self.length
+    def __getitem__(self, idx):
+        # ---------- FAST FOLDER LOCATE ----------
+        folder_id = np.searchsorted(self.cumsum_lengths, idx, side='right')
+        start = 0 if folder_id == 0 else self.cumsum_lengths[folder_id - 1]
+        data_len = self.lengths[folder_id]
+        npy_list = self.npy_names[folder_id]
+        # ---------- FAST FILE PARSE ----------
+        npy_path_name = npy_list[idx - start]
+        npy_path = npy_path_name[:-11]
+        idx_num = int(npy_path_name[-11:-4])
+        if idx_num + SEQ_LEN < data_len:
+            idx_s = idx_num
+        elif idx_num - SEQ_LEN > 0:
+            idx_s = idx_num - SEQ_LEN
+        else:
+            idx_s = data_len // 2
+        # Build ending frame filename once
+        end_str = f"{idx_s + SEQ_LEN - 1:07d}.npy"
+        # ---------- LOAD SUBGOAL / VELOCITY ----------
+        sub_goal = np.load(f"{npy_path}/sub_goals_local/{end_str}")
+        velocity = np.load(f"{npy_path}/velocities/{end_str}")
+        # ---------- CREATE LIDAR MAP (VECTORIZED) ----------
+        # scan_avg, semantic_avg shape = (SEQ_LEN*2, IMG_SIZE)
+        scan_avg = np.zeros((SEQ_LEN * 2, IMG_SIZE), dtype=np.float32)
+        semantic_avg = np.zeros((SEQ_LEN * 2, IMG_SIZE), dtype=np.float32)
+        # Precompute slicing
+        slice_idx = np.arange(0, IMG_SIZE * 9, 9).reshape(-1, 1) + np.arange(9)
+        for n in range(SEQ_LEN):
+            frame_idx = f"{idx_s + n:07d}.npy"
+            scan = np.load(f"{npy_path}/scans_lidar/{frame_idx}")[180:-180]
+            semantic = np.load(f"{npy_path}/semantic_label/{frame_idx}")[180:-180]
+            # Shape after slicing = (IMG_SIZE, 9)
+            bins_scan = scan[slice_idx]
+            bins_sem = semantic[slice_idx]
+            # ---- min map ----
+            mins = bins_scan.min(axis=1)
+            min_idx = bins_scan.argmin(axis=1)
+            sem_min = bins_sem[np.arange(IMG_SIZE), min_idx]
+            scan_avg[2 * n] = mins
+            semantic_avg[2 * n] = sem_min
+            # ---- avg map ----
+            scan_avg[2 * n + 1] = bins_scan.mean(axis=1)
+            # ---- majority vote (FAST) ----
+            # bincount on axis=1
+            # bins_sem is small (size 9), so bincount(256 classes) is OK
+            counts = np.apply_along_axis(np.bincount, 1, bins_sem.astype(int), minlength=256)
+            semantic_avg[2 * n + 1] = counts.argmax(axis=1)
+        # ---------- FINAL MAP EXPANSION ----------
+        scan_map = np.repeat(scan_avg.reshape(-1), 4)
+        semantic_map = np.repeat(semantic_avg.reshape(-1), 4)
+        # initialize:
+        sub_goal[np.isnan(sub_goal)] = 0.
+        sub_goal[np.isinf(sub_goal)] = 0.
+        velocity[np.isnan(velocity)] = 0.
+        velocity[np.isinf(velocity)] = 0.
+        # data normalization:
+        # standardization: scan
+        # mu: 4.518406, std: 8.2914915
+        scan_map = (scan_map - self.s_mu) / self.s_std
+        # standardization: sub goal
+        # mu: 4.518406, std: 8.2914915
+        sub_goal = (sub_goal - self.g_mu) / self.g_std
+        # transfer to pytorch tensor:
+        scan_tensor = torch.FloatTensor(scan_map)
+        semantic_tensor = torch.FloatTensor(semantic_map)
+        sub_goal_tensor = torch.FloatTensor(sub_goal)
+        velocity_tensor =  torch.FloatTensor(velocity)
+        data = {
+                'scan_map': scan_tensor,
+                'semantic_map': semantic_tensor,
+                'sub_goal': sub_goal_tensor,
+                'velocity': velocity_tensor,
+                }
+        return data
+#
+# end of function
+#------------------------------------------------------------------------------
+#
+# ResNet blocks
+#
+#------------------------------------------------------------------------------
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=dilation, groups=groups, bias=False, dilation=dilation)
+def conv1x1(in_planes, out_planes, stride=1):
+    """1x1 convolution"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+class Bottleneck(nn.Module):
+    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
+    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
+    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
+    # This variant is also known as ResNet V1.5 and improves accuracy according to
+    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
+    expansion = 2 #4
+    def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1,
+                 base_width=64, dilation=1, norm_layer=None):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.)) * groups
+        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+#
+# end of ResNet blocks
+#------------------------------------------------------------------------------
+#
+# the model is defined here
+#
+#------------------------------------------------------------------------------
+# define the PyTorch MLP model
+#
+class SemanticCNN(nn.Module):
+    # function: init
+    #
+    # arguments: input_size - int representing size of input
+    #            hidden_size - number of nodes in the hidden layer
+    #            num_classes - number of classes to classify
+    #
+    # return: none
+    #
+    # This method is the main function.
+    #
+    def __init__(self, block, layers, num_classes=2, zero_init_residual=True,
+                 groups=1, width_per_group=64, replace_stride_with_dilation=None,
+                 norm_layer=None):
+        # inherit the superclass properties/methods
+        #
+        super(SemanticCNN, self).__init__()
+        # define the model
+        #
+        ################## ped_pos net model: ###################
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            # each element in the tuple indicates if we should replace
+            # the 2x2 stride with a dilated convolution instead
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError("replace_stride_with_dilation should be None "
+                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(2, self.inplanes, kernel_size=3, stride=1, padding=1,
+                               bias=False)
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
+                                       dilate=replace_stride_with_dilation[0])
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
+                                       dilate=replace_stride_with_dilation[1])
+        self.conv2_2 = nn.Sequential(
+            nn.Conv2d(in_channels=256, out_channels=128, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), stride=(1,1), padding=(1, 1)),
+            nn.BatchNorm2d(128),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(256)
+        )
+        self.downsample2 = nn.Sequential(
+            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(1, 1), stride=(2,2), padding=(0, 0)),
+            nn.BatchNorm2d(256)
+        )
+        self.relu2 = nn.ReLU(inplace=True)
+        self.conv3_2 = nn.Sequential(
+            nn.Conv2d(in_channels=512, out_channels=256, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3, 3), stride=(1,1), padding=(1, 1)),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(1, 1), stride=(1,1), padding=(0, 0)),
+            nn.BatchNorm2d(512)
+        )
+        self.downsample3 = nn.Sequential(
+            nn.Conv2d(in_channels=64, out_channels=512, kernel_size=(1, 1), stride=(4,4), padding=(0, 0)),
+            nn.BatchNorm2d(512)
+        )
+        self.relu3 = nn.ReLU(inplace=True)
+        # self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
+        #                               dilate=replace_stride_with_dilation[2])
+        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = nn.Linear(256 * block.expansion + 2, num_classes)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm1d): # add by xzt
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.xavier_normal_(m.weight)
+        # Zero-initialize the last BN in each residual branch,
+        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
+        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
+                            self.base_width, previous_dilation, norm_layer))
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(block(self.inplanes, planes, groups=self.groups,
+                                base_width=self.base_width, dilation=self.dilation,
+                                norm_layer=norm_layer))
+        return nn.Sequential(*layers)
+    def _forward_impl(self, scan, semantics, goal):
+        ###### Start of fusion net ######
+        scan_in = scan.reshape(-1,1,80,80)
+        semantics_in = semantics.reshape(-1,1,80,80)
+        fusion_in = torch.cat((scan_in, semantics_in), dim=1)
+        # See note [TorchScript super()]
+        x = self.conv1(fusion_in)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        identity3 = self.downsample3(x)
+        x = self.layer1(x)
+        identity2 = self.downsample2(x)
+        x = self.layer2(x)
+        x = self.conv2_2(x)
+        x += identity2
+        x = self.relu2(x)
+        x = self.layer3(x)
+        # x = self.layer4(x)
+        x = self.conv3_2(x)
+        x += identity3
+        x = self.relu3(x)
+        x = self.avgpool(x)
+        fusion_out = torch.flatten(x, 1)
+        ###### End of fusion net ######
+        ###### Start of goal net #######
+        goal_in = goal.reshape(-1,2)
+        goal_out = torch.flatten(goal_in, 1)
+        ###### End of goal net #######
+        # Combine
+        fc_in = torch.cat((fusion_out, goal_out), dim=1)
+        x = self.fc(fc_in)
+        return x
+    def forward(self, scan, semantics, goal):
+        return self._forward_impl(scan, semantics, goal)
+    #
+    # end of method
+#
+# end of class
+#
+# end of file

training/scripts/train.py ADDED Viewed

	@@ -0,0 +1,385 @@

+#!/usr/bin/env python
+#
+# file: $ISIP_EXP/SOGMP/scripts/train.py
+#
+# revision history: xzt
+#  20220824 (TE): first version
+#
+# usage:
+#  python train.py mdir train_data val_data
+#
+# arguments:
+#  mdir: the directory where the output model is stored
+#  train_data: the directory of training data
+#  val_data: the directory of valiation data
+#
+# This script trains a Semantic CNN model
+#------------------------------------------------------------------------------
+# import pytorch modules
+#
+import torch
+import torch.nn as nn
+from torch.optim import Adam
+from tqdm import tqdm
+# visualize:
+from tensorboardX import SummaryWriter
+import numpy as np
+# import the model and all of its variables/functions
+#
+from model import *
+# import modules
+#
+import sys
+import os
+#-----------------------------------------------------------------------------
+#
+# global variables are listed here
+#
+#-----------------------------------------------------------------------------
+# general global values
+#
+model_dir = './model/semantic_cnn_model.pth'  # the path of model storage
+NUM_ARGS = 3
+NUM_EPOCHS = 4000
+BATCH_SIZE = 64
+LEARNING_RATE = "lr"
+BETAS = "betas"
+EPS = "eps"
+WEIGHT_DECAY = "weight_decay"
+# for reproducibility, we seed the rng
+#
+set_seed(SEED1)
+# adjust_learning_rate
+#
+def adjust_learning_rate(optimizer, epoch):
+    lr = 1e-3
+    if epoch > 40:
+        lr = 2e-4
+    if epoch > 2000:
+        lr = 2e-5
+    if epoch > 21000:
+        lr = 1e-5
+    if epoch > 32984:
+        lr = 1e-6
+    if epoch > 48000:
+       # lr = 5e-8
+       lr = lr * (0.1 ** (epoch // 110000))
+    #  if epoch > 8300:
+    #      lr = 1e-9
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr
+# train function:
+def train(model, dataloader, dataset, device, optimizer, criterion, epoch, epochs):
+    ################################## Train #####################################
+    # Set model to training mode
+    model.train()
+    # for each batch in increments of batch size
+    #
+    running_loss = 0
+    counter = 0
+    # get the number of batches (ceiling of train_data/batch_size):
+    num_batches = int(len(dataset)/dataloader.batch_size)
+    for i, batch in tqdm(enumerate(dataloader), total=num_batches):
+    #for i, batch in enumerate(dataloader, 0):
+        counter += 1
+        # collect the samples as a batch:
+        scan_maps = batch['scan_map']
+        scan_maps = scan_maps.to(device)
+        semantic_maps = batch['semantic_map']
+        semantic_maps = semantic_maps.to(device)
+        sub_goals = batch['sub_goal']
+        sub_goals = sub_goals.to(device)
+        velocities = batch['velocity']
+        velocities = velocities.to(device)
+        # set all gradients to 0:
+        optimizer.zero_grad()
+        # feed the network the batch
+        #
+        output = model(scan_maps, semantic_maps, sub_goals)
+        #writer.add_graph(model,[batch_ped_pos_t, batch_scan_t, batch_goal_t])
+        # get the loss
+        #
+        # loss = criterion(output, velocities)
+        # ---------------------------
+        # Mask zero-velocity samples
+        # ---------------------------
+        mask = (velocities != 0).any(dim=1)  # (B,)
+        if mask.sum() == 0:
+            loss = output.sum() * 0   # safe zero loss
+        else:
+            loss = criterion(output[mask], velocities[mask])
+        # perform back propagation:
+        loss.backward(torch.ones_like(loss))
+        optimizer.step()
+        # get the loss:
+        # multiple GPUs:
+        if torch.cuda.device_count() > 1:
+            loss = loss.mean()
+        running_loss += loss.item()
+        # display informational message
+        #
+        if(i % 1280 == 0):
+            print('Epoch [{}/{}], Step[{}/{}], Loss: {:.4f}'
+                    .format(epoch, epochs, i + 1, num_batches, loss.item()))
+    train_loss = running_loss / len(dataset) #counter
+    return train_loss
+# validate function:
+def validate(model, dataloader, dataset, device, criterion):
+    ################################## Train #####################################
+    # set model to evaluation mode:
+    model.eval()
+    # for each batch in increments of batch size
+    #
+    running_loss = 0
+    counter = 0
+    # get the number of batches (ceiling of train_data/batch_size):
+    num_batches = int(len(dataset)/dataloader.batch_size)
+    for i, batch in tqdm(enumerate(dataloader), total=num_batches):
+    #for i, batch in enumerate(dataloader, 0):
+        counter += 1
+        # collect the samples as a batch:
+        scan_maps = batch['scan_map']
+        scan_maps = scan_maps.to(device)
+        semantic_maps = batch['semantic_map']
+        semantic_maps = semantic_maps.to(device)
+        sub_goals = batch['sub_goal']
+        sub_goals = sub_goals.to(device)
+        velocities = batch['velocity']
+        velocities = velocities.to(device)
+        # feed the network the batch
+        #
+        output = model(scan_maps, semantic_maps, sub_goals)
+        #writer.add_graph(model,[batch_ped_pos_t, batch_scan_t, batch_goal_t])
+        # get the loss
+        #
+        # loss = criterion(output, velocities)
+        # ---------------------------
+        # Mask zero-velocity samples
+        # ---------------------------
+        mask = (velocities != 0).any(dim=1)  # (B,)
+        if mask.sum() == 0:
+            loss = output.sum() * 0   # safe zero loss
+        else:
+            loss = criterion(output[mask], velocities[mask])
+        # get the loss:
+        # multiple GPUs:
+        if torch.cuda.device_count() > 1:
+            loss = loss.mean()
+        running_loss += loss.item()
+    val_loss = running_loss / len(dataset) #counter
+    return val_loss
+#------------------------------------------------------------------------------
+#
+# the main program starts here
+#
+#------------------------------------------------------------------------------
+# function: main
+#
+# arguments: none
+#
+# return: none
+#
+# This method is the main function.
+#
+def main(argv):
+    # ensure we have the correct amount of arguments
+    #
+    #global cur_batch_win
+    if(len(argv) != NUM_ARGS):
+        print("usage: python nedc_train_mdl.py [MDL_PATH] [TRAIN_PATH] [DEV_PATH]")
+        exit(-1)
+    # define local variables
+    #
+    mdl_path = argv[0]
+    pTrain = argv[1]
+    pDev = argv[2]
+    # get the output directory name
+    #
+    odir = os.path.dirname(mdl_path)
+    # if the odir doesn't exits, we make it
+    #
+    if not os.path.exists(odir):
+        os.makedirs(odir)
+    # set the device to use GPU if available
+    #
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    ### train:
+    print('...Start reading data...')
+    # get array of the data
+    # data: [[0, 1, ... 26], [27, 28, ...] ...]
+    # labels: [0, 0, 1, ...]
+    #
+    #[ped_pos_t, scan_t, goal_t, vel_t] = get_data(pTrain)
+    train_dataset = NavDataset(pTrain, 'train')
+    train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, \
+                                                   shuffle=True, drop_last=True, pin_memory=True)
+    #train_data = train_data - np.mean(train_data, axis=0)
+    ### dev:
+    # get array of the data
+    # data: [[0, 1, ... 26], [27, 28, ...] ...]
+    # labels: [0, 0, 1, ...]
+    #
+    #[ped_pos_d, scan_d, goal_d, vel_d] = get_data(pDev)
+    dev_dataset = NavDataset(pDev, 'dev')
+    dev_dataloader = torch.utils.data.DataLoader(dev_dataset, batch_size=BATCH_SIZE, \
+                                                   shuffle=True, drop_last=True, pin_memory=True)
+    #dev_data = dev_data - np.mean(dev_data, axis=0)
+    print('...Finish reading data...')
+    # instantiate a model
+    #
+    model = SemanticCNN(Bottleneck, [2, 1, 1])
+    # moves the model to device (cpu in our case so no change)
+    #
+    model.to(device)
+    # set the adam optimizer parameters
+    #
+    opt_params = { LEARNING_RATE: 0.001,
+                   BETAS: (.9,0.999),
+                   EPS: 1e-08,
+                   WEIGHT_DECAY: .001 }
+    # set the loss and optimizer
+    #
+    criterion = nn.MSELoss(reduction='sum')
+    criterion.to(device)
+    # create an optimizer, and pass the model params to it
+    #
+    optimizer = Adam(model.parameters(), **opt_params)
+    # get the number of epochs to train on
+    #
+    epochs = NUM_EPOCHS
+    # if there are trained models, continue training:
+    if os.path.exists(mdl_path):
+        checkpoint = torch.load(mdl_path)
+        model.load_state_dict(checkpoint['model'])
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        start_epoch = checkpoint['epoch']
+        print('Load epoch {} success'.format(start_epoch))
+    else:
+        start_epoch = 0
+        print('No trained models, restart training')
+    # multiple GPUs:
+    if torch.cuda.device_count() > 1:
+        print("Let's use 2 of total", torch.cuda.device_count(), "GPUs!")
+        # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
+        model = nn.DataParallel(model) #, device_ids=[0, 1])
+    # moves the model to device (cpu in our case so no change)
+    #
+    model.to(device)
+    # tensorboard writer:
+    writer = SummaryWriter('runs')
+    # for each epoch
+    #
+    #loss_train = []
+    #loss_vector = []
+    epoch_num = 0
+    for epoch in range(start_epoch+1, epochs):
+        # adjust learning rate:
+        adjust_learning_rate(optimizer, epoch)
+        ################################## Train #####################################
+        # for each batch in increments of batch size
+        #
+        train_epoch_loss = train(
+            model, train_dataloader, train_dataset, device, optimizer, criterion, epoch, epochs
+        )
+        ################################## Test #####################################
+        valid_epoch_loss = validate(
+            model, dev_dataloader, dev_dataset, device, criterion
+        )
+        # log the epoch loss
+        writer.add_scalar('training loss',
+                        train_epoch_loss,
+                        epoch)
+        writer.add_scalar('validation loss',
+                        valid_epoch_loss,
+                        epoch)
+        print('Train set: Average loss: {:.4f}'.format(train_epoch_loss))
+        print('Validation set: Average loss: {:.4f}'.format(valid_epoch_loss))
+        # save the model
+        #
+        if(epoch % 50 == 0):
+            if torch.cuda.device_count() > 1: # multiple GPUS:
+                state = {'model':model.module.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch}
+            else:
+                state = {'model':model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch}
+            path='./model/model' + str(epoch) +'.pth'
+            torch.save(state, path)
+        epoch_num = epoch
+    # save the final model
+    if torch.cuda.device_count() > 1: # multiple GPUS:
+        state = {'model':model.module.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch_num}
+    else:
+        state = {'model':model.state_dict(), 'optimizer':optimizer.state_dict(), 'epoch':epoch_num}
+    torch.save(state, mdl_path)
+    # exit gracefully
+    #
+    return True
+#
+# end of function
+# begin gracefully
+#
+if __name__ == '__main__':
+    main(sys.argv[1:])
+#
+# end of file