Deep learning 

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_confusion_matrix_report_path

– training_confusion_matrix_report_plot

– training_class_distribution_report_path

– training_classified_point_cloud_path

– training_activations_path

Hierarchical feature extraction with KPConv

The ConvAutoencPwiseClassif architecture can be configured with Kernel Point Convolution (KPConv) for feature extraction operations. The downsampling strategy can be defined through the FeaturesDownsamplingLayer or the StridedKPConvLayer, the upsampling strateg through the FeaturesUpsamplingLayer, and the feature extraction through the KPConvLayer. The JSON below illustrates how to configure KPConv-based hierarchical feature extractor using the VL3D framework. For further details on the original KPConv architecture, readers are referred to the KPConv paper (Thomas et al., 2019) .

{
  "in_pcloud": [
    "/mnt/netapp2/Store_uscciaep/lidar_data/hessigheim/vl3d/mined/Mar18_train_hsv_std.laz"
  ],
  "out_pcloud": [
    "/mnt/netapp2/Store_uscciaep/lidar_data/hessigheim/vl3d/kpconv_R/T1/*"
  ],
  "sequential_pipeline": [
    {
        "train": "ConvolutionalAutoencoderPwiseClassifier",
        "training_type": "base",
        "fnames": ["Reflectance", "ones"],
        "random_seed": null,
        "model_args": {
            "fnames": ["Reflectance", "ones"],
            "num_classes": 11,
            "class_names": ["LowVeg", "ImpSurf", "Vehicle", "UrbanFurni", "Roof", "Facade", "Shrub", "Tree", "Soil/Gravel", "VertSurf", "Chimney"],
            "pre_processing": {
                "pre_processor": "hierarchical_fps",
                "support_strategy_num_points": 60000,
                "to_unit_sphere": false,
                "support_strategy": "fps",
                "support_chunk_size": 2000,
                "support_strategy_fast": true,
                "center_on_pcloud": true,
                "neighborhood": {
                    "type": "sphere",
                    "radius": 3.0,
                    "separation_factor": 0.8
                },
                "num_points_per_depth": [512, 256, 128, 64, 32],
                "fast_flag_per_depth": [false, false, false, false, false],
                "num_downsampling_neighbors": [1, 16, 8, 8, 4],
                "num_pwise_neighbors": [32, 16, 16, 8, 4],
                "num_upsampling_neighbors": [1, 16, 8, 8, 4],
                "nthreads": 12,
                "training_receptive_fields_distribution_report_path": "*/training_eval/training_receptive_fields_distribution.log",
                "training_receptive_fields_distribution_plot_path": "*/training_eval/training_receptive_fields_distribution.svg",
                "training_receptive_fields_dir": null,
                "receptive_fields_distribution_report_path": "*/training_eval/receptive_fields_distribution.log",
                "receptive_fields_distribution_plot_path": "*/training_eval/receptive_fields_distribution.svg",
                "receptive_fields_dir": null,
                "training_support_points_report_path": "*/training_eval/training_support_points.las",
                "support_points_report_path": "*/training_eval/support_points.las"
            },
            "feature_extraction": {
                "type": "KPConv",
                "operations_per_depth": [2, 1, 1, 1, 1],
                "feature_space_dims": [64, 64, 128, 256, 512, 1024],
                "bn": true,
                "bn_momentum": 0.0,
                "activate": true,
                "sigma": [3.0, 3.0, 3.0, 3.0, 3.0, 3.0],
                "kernel_radius": [3.0, 3.0, 3.0, 3.0, 3.0, 3.0],
                "num_kernel_points": [15, 15, 15, 15, 15, 15],
                "deformable": [false, false, false, false, false, false],
                "W_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                "W_regularizer": [null, null, null, null, null, null],
                "W_constraint": [null, null, null, null, null, null],
                "unary_convolution_wrapper": {
                    "activation": "relu",
                    "initializer": "glorot_uniform",
                    "bn": true,
                    "bn_momentum": 0.98,
                    "feature_dim_divisor": 2
                }
            },
            "structure_alignment": null,
            "features_alignment": null,
            "downsampling_filter": "strided_kpconv",
            "upsampling_filter": "mean",
            "upsampling_bn": true,
            "upsampling_momentum": 0.0,
            "conv1d_kernel_initializer": "glorot_normal",
            "output_kernel_initializer": "glorot_normal",
            "model_handling": {
                "summary_report_path": "*/model_summary.log",
                "training_history_dir": "*/training_eval/history",
                "kpconv_representation_dir": "*/training_eval/kpconv_layers/",
                "skpconv_representation_dir": "*/training_eval/skpconv_layers/",
                "class_weight": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                "training_epochs": 300,
                "batch_size": 16,
                "checkpoint_path": "*/checkpoint.weights.h5",
                "checkpoint_monitor": "loss",
                "learning_rate_on_plateau": {
                    "monitor": "loss",
                    "mode": "min",
                    "factor": 0.1,
                    "patience": 2000,
                    "cooldown": 5,
                    "min_delta": 0.01,
                    "min_lr": 1e-6
                }
            },
            "compilation_args": {
                "optimizer": {
                    "algorithm": "SGD",
                    "learning_rate": {
                        "schedule": "exponential_decay",
                        "schedule_args": {
                            "initial_learning_rate": 1e-2,
                            "decay_steps": 15000,
                            "decay_rate": 0.96,
                            "staircase": false
                        }
                    }
                },
                "loss": {
                    "function": "class_weighted_categorical_crossentropy"
                },
                "metrics": [
                    "categorical_accuracy"
                ]
            },
            "architecture_graph_path": "*/model_graph.png",
            "architecture_graph_args": {
                "show_shapes": true,
                "show_dtype": true,
                "show_layer_names": true,
                "rankdir": "TB",
                "expand_nested": true,
                "dpi": 300,
                "show_layer_activations": true
            }
        },
        "autoval_metrics": ["OA", "P", "R", "F1", "IoU", "wP", "wR", "wF1", "wIoU", "MCC", "Kappa"],
        "training_evaluation_metrics": ["OA", "P", "R", "F1", "IoU", "wP", "wR", "wF1", "wIoU", "MCC", "Kappa"],
        "training_class_evaluation_metrics": ["P", "R", "F1", "IoU"],
        "training_evaluation_report_path": "*/training_eval/evaluation.log",
        "training_class_evaluation_report_path": "*/training_eval/class_evaluation.log",
        "training_confusion_matrix_report_path": "*/training_eval/confusion.log",
        "training_confusion_matrix_plot_path": "*/training_eval/confusion.svg",
        "training_class_distribution_report_path": "*/training_eval/class_distribution.log",
        "training_class_distribution_plot_path": "*/training_eval/class_distribution.svg",
        "training_classified_point_cloud_path": "*/training_eval/classified_point_cloud.las",
        "training_activations_path": null
    },
    {
      "writer": "PredictivePipelineWriter",
      "out_pipeline": "*pipe/KPC_T1.pipe",
      "include_writer": false,
      "include_imputer": false,
      "include_feature_transformer": false,
      "include_miner": false,
      "include_class_transformer": false
    }
  ]
}

The JSON above defines a ConvAutoencPwiseClassif that uses a hierarchical furthest point sampling strategy with a 3D spherical neighborhood and the KPConv operator for feature extraction. It is expected to work on a feature space with a column of ones (for feature-unbiased geometric features) and another of reflectances.

Arguments

– training_type: Typically it should be "base" for neural networks. For further details, read the training strategies section.

– fnames: The name of the features that must be given as input to the neural network. For hierarchical autoencoders this list can contain "ones" to specify whether to include a column of ones in the input space matrix. This architecture does not support empty feature spaces as input, thus, when no features are given, the input feature space must be represented with a column of ones. NOTE that, for technical reasons, the feature names should also be given inside the model_args dictionary.

– random_seed

Can be used to specify an integer like seed for any randomness-based computation. Mostly to be used for reproducibility purposes. Note that the initialization of a neural network is often based on random distributions. This parameter does not affect those distributions, so it will not guarantee reproducibility for of deep learning models.

– model_args

The model specification.

– fnames: The feature names must be given again inside the model_args dictionary due to technical reasons.

– num_classess: An integer specifying the number of classes involved in the point-wise classification tasks.

– class_names: The names of the classes involved in the classification task. Each string corresponds to the class associated to its index in the list.

– pre_processing

How the select and fix stages of the deep learning strategy must be handled. Note that hierarchical autoencoders demand hierarchical receptive fields. See the receptive fields and hierarchical FPS receptive field sections for further details.

– feature_extraction

The definition of the feature extraction operator. A detailed description of the case when "type": "KPConv" is given below. For a description of the case when "type": "PointNet" see the PointNet operator documentation.

– operations_per_depth: A list specifying how many operations per depth level. The i-th element of the list gives the number of feature extraction operations at depth i.

– feature_space_dims: A list specifying the output dimensionality of the feature space after each feature extration operation. The i-th element of the list gives the dimensionality of the i-th feature extraction operation.

– bn: Boolean flag to decide whether to enable batch normalization for feature extraction.

– bn_momentum: Momentum for the moving average of the batch normalization, such that new_mean = old_mean * momentum + batch_mean * (1 - momentum). See the Keras documentation on batch normalization for more details.

– activate: True to activate the output of the KPConv, False otherwise.

– sigma: The influence distance of the kernel points for each KPConv.

– kernel_radius: The radius of the ball where the kernel points belong for each KPConv.

– num_kernel_points: The number of points (i.e., structure space dimensionality) for each KPConv kernel.

– deformable: Whether the structure space of the KPConv will be optimized (True) or not (False), for each KPConv.
– W_initializer: The initialization method for the weights of each KPConv. See the keras documentation on initializers for more details.
– W_regularizer: The regularization strategy for weights of each KPConv. See the keras documentation on regularizers for more details.
– W_constraint: The constraints of the weights of each KPConv. See the keras documentation on constraints for more details.

– unary_convolution_wrapper

The specification of the unary convolutions (aka SharedMLPs) to be applied before the KPConv layer to half the feature dimensionality and also after to restore it.

– activation: The activation function for each unary convolution / SharedMLP. See the keras documentation on activations for more details.
– activate_postwrap: Whether to include an activation function after the unary convolution (after the batch normalization, if any).
– initializer: The initialization method for the point-wise unary convolutions (SharedMLPs). See the keras documentation on initializers for more details.
– bn: Whether to enable batch normalization (True) or not (False).
– bn_momentum: Momentum for the moving average of the batch normalization, such that new_mean = old_mean * momentum + batch_mean * (1 - momentum). See the Keras documentation on batch normalization for more details.
– postwrap_bn: Whether to include a batch normalization layer after the unary convolution.
– feature_dim_divisor: The divisor for the dimensionality in the unary convolution wrapper. The number of features will be divided by this number. The default is \(2\).

– structure_alignment

When given, this specification will govern the alignment of the structure space.

– tnet_pre_filters_spec: List defining the number of pre-transformation filters at each depth.
– tnet_post_filters_spec: List defining the number of post-transformation filters at each depth.
– kernel_initializer: The kernel initialization method for the structure alignment layers. See the keras documentation on initializers for more details.

– features_alignment: When given, this specification will govern the alignment of the feature space. It is like the structure_alignment dictionary but it is applied to the features instead of the structure space.
– downsampling_filter: The type of downsampling filter. See StridedKPConvLayer, FeaturesDownsamplingLayer, and InterdimensionalPointTransformerLayer for more details.
– upsampling_filter: The type of upsampling filter. See FeaturesUpsamplingLayer and InterdimensionalPointTransformerLayer for more details.

– upsampling_bn: Boolean flag to decide whether to enable batch normalization for upsampling transformations.

– upsampling_momentum: Momentum for the moving average of the upsampling batch normalization, such that new_mean = old_mean * momentum + batch_mean * (1-momentum). See the Keras documentation on batch normalization for more details.

– conv1d_kernel_initializer: The initialization method for the 1D convolutions during upsampling. See the keras documentation on initializers for more details.

– output_kernel_initializer: The initialization method for the final 1D convolution that computes the point-wise outputs of the neural network. See the keras documentation on initializers for more details.

– model_handling

Define how to handle the model, i.e., not the architecture itself but how it must be used. See the description of PointNet model handling for more details. The main difference for hierarchical autoencoders using KPConv are:

– kpconv_representation_dir: Path where the plots and CSV data representing the KPConv kernels will be stored.
– skpconv_representation_dir: Path where the plots and CSV data representing the strided KPConv kernels will be stored.

– compilation_args: The arguments governing the model’s compilation. They include the optimizer, the loss function and the metrics to be monitored during training. See the optimizers section and losses section for further details.
– training_evaluation_metrics: See PointNet-like point-wise classifier arguments.
– training_class_evaluation_metrics: See PointNet-like point-wise classifier arguments.
– training_evaluation_report_path: See PointNet-like point-wise classifier arguments.
– training_class_evaluation_report_path: See PointNet-like point-wise classifier arguments.
– training_confusion_matrix_report_path: See PointNet-like point-wise classifier arguments.
– training_confusion_matrix_report_plot: See PointNet-like point-wise classifier arguments.
– training_class_distribution_report_path: See PointNet-like point-wise classifier arguments.
– training_classified_point_cloud_path: See PointNet-like point-wise classifier arguments.
– training_activations_path: See PointNet-like point-wise classifier arguments.

Hierarchical feature extraction with SFL-NET

The ConvAutoencPwiseClassif architecture can be configured as a Slight Filter Learning Network (SFL-NET). This neural network for 3D point clouds was introduced in the SFL-NET paper (Li et al., 2023) . It uses a simplified version of KPConv and changes the shared MLPs by hourglasses in the upsampling and final layers. On top of that, it uses the hourglass layer to define a residual hourglass block that wraps each feature extraction layer at the different depths of the encoding hierarchy. The JSON below illustrates how to configure a SFL-NET-like hierarchical feature extractor using the VL3D framework.

{
    "in_pcloud": [
        "/oldext4/lidar_data/vl3dhack/data/dales/train/5080_54435.laz"
    ],
    "out_pcloud": [
        "/oldext4/lidar_data/vl3dhack/multiclass/out/DL_SFLNET/T1/*"
    ],
    "sequential_pipeline": [
        {
            "class_transformer": "ClassReducer",
            "on_predictions": false,
            "input_class_names": ["noclass", "ground", "vegetation", "cars", "trucks", "powerlines", "fences", "poles", "buildings"],
            "output_class_names": ["ground", "vegetation", "buildings", "powerlines", "objects", "noclass"],
            "class_groups": [["ground"], ["vegetation"], ["buildings"], ["powerlines"], ["cars", "trucks", "fences", "poles"], ["noclass"]],
            "report_path": "*class_reduction.log",
            "plot_path": "*class_reduction.svg"
        },
        {
            "train": "ConvolutionalAutoencoderPwiseClassifier",
            "training_type": "base",
            "fnames": ["ones"],
            "random_seed": null,
            "model_args": {
                "fnames": ["ones"],
                "num_classes": 6,
                "class_names": ["ground", "vegetation", "buildings", "powerlines", "objects", "noclass"],
                "pre_processing": {
                    "pre_processor": "hierarchical_fps",
                    "support_strategy_num_points": 200000,
                    "to_unit_sphere": false,
                    "support_strategy": "fps",
                    "support_chunk_size": 10000,
                    "support_strategy_fast": true,
                    "receptive_field_oversampling": {
                        "min_points": 2,
                        "strategy": "nearest",
                        "k": 3,
                        "radius": 0.5
                    },
                    "center_on_pcloud": true,
                    "neighborhood": {
                        "type": "sphere",
                        "radius": 6.0,
                        "separation_factor": 0.8
                    },
                    "num_points_per_depth": [256, 128, 64, 32, 16],
                    "fast_flag_per_depth": [false, false, false, false, false],
                    "num_downsampling_neighbors": [1, 16, 16, 16, 16],
                    "num_pwise_neighbors": [16, 16, 16, 16, 16],
                    "num_upsampling_neighbors": [1, 16, 16, 16, 16],
                    "nthreads": -1,
                    "training_receptive_fields_distribution_report_path": "*/training_eval/training_receptive_fields_distribution.log",
                    "training_receptive_fields_distribution_plot_path": "*/training_eval/training_receptive_fields_distribution.svg",
                    "training_receptive_fields_dir": "*/training_eval/training_rf/",
                    "receptive_fields_distribution_report_path": "*/training_eval/receptive_fields_distribution.log",
                    "receptive_fields_distribution_plot_path": "*/training_eval/receptive_fields_distribution.svg",
                    "_receptive_fields_dir": "*/training_eval/receptive_fields/",
                    "training_support_points_report_path": "*/training_eval/training_support_points.las",
                    "support_points_report_path": "*/training_eval/support_points.las"
                },
                "feature_extraction": {
                    "type": "LightKPConv",
                    "operations_per_depth": [2, 1, 1, 1, 1],
                    "feature_space_dims": [64, 64, 128, 256, 512, 1024],
                    "bn": true,
                    "bn_momentum": 0.98,
                    "activate": true,
                    "sigma": [6.0, 6.0, 7.5, 9.0, 10.5, 12.0],
                    "kernel_radius": [6.0, 6.0, 6.0, 6.0, 6.0, 6.0],
                    "num_kernel_points": [15, 15, 15, 15, 15, 15],
                    "deformable": [false, false, false, false, false, false],
                    "W_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "W_regularizer": [null, null, null, null, null, null],
                    "W_constraint": [null, null, null, null, null, null],
                    "A_trainable": [true, true, true, true, true ,true],
                    "A_regularizer": [null, null, null, null, null, null],
                    "A_constraint": [null, null, null, null, null, null],
                    "A_initializer": ["ones", "ones", "ones", "ones", "ones", "ones"],
                    "unary_convolution_wrapper": null,
                    "hourglass_wrapper": {
                        "internal_dim": [2, 2, 4, 16, 32, 64],
                        "parallel_internal_dim": [8, 8, 16, 32, 64, 128],
                        "activation": ["relu", "relu", "relu", "relu", "relu", "relu"],
                        "activation2": [null, null, null, null, null, null],
                        "regularize": [true, true, true, true, true, true],
                        "W1_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                        "W1_regularizer": [null, null, null, null, null, null],
                        "W1_constraint": [null, null, null, null, null, null],
                        "W2_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                        "W2_regularizer": [null, null, null, null, null, null],
                        "W2_constraint": [null, null, null, null, null, null],
                        "loss_factor": 0.1,
                        "subspace_factor": 0.125,
                        "feature_dim_divisor": 4,
                        "bn": false,
                        "bn_momentum": 0.98,
                        "out_bn": true,
                        "out_bn_momentum": 0.98,
                        "out_activation": "relu"
                    }
                },
                "features_alignment": null,
                "downsampling_filter": "strided_lightkpconv",
                "upsampling_filter": "mean",
                "upsampling_bn": true,
                "upsampling_momentum": 0.98,
                "upsampling_hourglass": {
                    "activation": "relu",
                    "activation2": null,
                    "regularize": true,
                    "W1_initializer": "glorot_uniform",
                    "W1_regularizer": null,
                    "W1_constraint": null,
                    "W2_initializer": "glorot_uniform",
                    "W2_regularizer": null,
                    "W2_constraint": null,
                    "loss_factor": 0.1,
                    "subspace_factor": 0.125
                },
                "conv1d": false,
                "conv1d_kernel_initializer": "glorot_normal",
                "output_kernel_initializer": "glorot_normal",
                "model_handling": {
                    "summary_report_path": "*/model_summary.log",
                    "training_history_dir": "*/training_eval/history",
                    "kpconv_representation_dir": "*/training_eval/kpconv_layers/",
                    "skpconv_representation_dir": "*/training_eval/skpconv_layers/",
                    "lkpconv_representation_dir": "*/training_eval/lkpconv_layers/",
                    "slkpconv_representation_dir": "*/training_eval/slkpconv_layers/",
                    "class_weight": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
                    "training_epochs": 300,
                    "batch_size": 64,
                    "training_sequencer": {
                        "type": "DLSequencer",
                        "random_shuffle_indices": true,
                        "augmentor": {
                            "transformations": [
                                    {
                                        "type": "Rotation",
                                        "axis": [0, 0, 1],
                                        "angle_distribution": {
                                            "type": "uniform",
                                            "start": -3.141592,
                                            "end": 3.141592
                                        }
                                    },
                                    {
                                        "type": "Scale",
                                        "scale_distribution": {
                                            "type": "uniform",
                                            "start": 0.99,
                                            "end": 1.01
                                        }
                                    },
                                    {
                                        "type": "Jitter",
                                        "noise_distribution": {
                                            "type": "normal",
                                            "mean": 0,
                                            "stdev": 0.001
                                        }
                                    }
                            ]
                        }
                    },
                    "prediction_reducer": {
                        "reduce_strategy" : {
                            "type": "MeanPredReduceStrategy"
                        },
                        "select_strategy": {
                            "type": "ArgMaxPredSelectStrategy"
                        }
                    },
                    "checkpoint_path": "*/checkpoint.weights.h5",
                    "checkpoint_monitor": "loss",
                    "learning_rate_on_plateau": {
                        "monitor": "loss",
                        "mode": "min",
                        "factor": 0.1,
                        "patience": 2000,
                        "cooldown": 5,
                        "min_delta": 0.01,
                        "min_lr": 1e-6
                    }
                },
                "compilation_args": {
                    "optimizer": {
                        "algorithm": "Adam",
                        "learning_rate": {
                            "schedule": "exponential_decay",
                            "schedule_args": {
                                "initial_learning_rate": 1e-2,
                                "decay_steps": 9000,
                                "decay_rate": 0.96,
                                "staircase": false
                            }
                        }
                    },
                    "loss": {
                        "function": "class_weighted_categorical_crossentropy"
                    },
                    "metrics": [
                        "categorical_accuracy"
                    ]
                },
                "architecture_graph_path": "*/model_graph.png",
                "architecture_graph_args": {
                    "show_shapes": true,
                    "show_dtype": true,
                    "show_layer_names": true,
                    "rankdir": "TB",
                    "expand_nested": true,
                    "dpi": 300,
                    "show_layer_activations": true
                }
            },
            "autoval_metrics": ["OA", "P", "R", "F1", "IoU", "wP", "wR", "wF1", "wIoU", "MCC", "Kappa"],
            "training_evaluation_metrics": ["OA", "P", "R", "F1", "IoU", "wP", "wR", "wF1", "wIoU", "MCC", "Kappa"],
            "training_class_evaluation_metrics": ["P", "R", "F1", "IoU"],
            "training_evaluation_report_path": "*/training_eval/evaluation.log",
            "training_class_evaluation_report_path": "*/training_eval/class_evaluation.log",
            "training_confusion_matrix_report_path": "*/training_eval/confusion.log",
            "training_confusion_matrix_plot_path": "*/training_eval/confusion.svg",
            "training_class_distribution_report_path": "*/training_eval/class_distribution.log",
            "training_class_distribution_plot_path": "*/training_eval/class_distribution.svg",
            "training_classified_point_cloud_path": "*/training_eval/classified_point_cloud.las",
            "training_activations_path": null
        },
        {
            "writer": "PredictivePipelineWriter",
            "out_pipeline": "*/model/SFLNET.pipe",
            "include_writer": false,
            "include_imputer": true,
            "include_feature_transformer": true,
            "include_miner": true,
            "include_class_transformer": false,
            "include_clustering": false,
            "ignore_predictions": false
        }
    ]
}

The JSON above defines a ConvAutoencPwiseClassif that uses a hierarchical furthest point sampling strategy with a 3D spherical neighborhood to prepare the input for a SFL-NET model. The subspace and loss factors are configured to \(\alpha=1/8\) and \(\beta=1/10\), as recommended in the SFL-NET paper (Li et al., 2023) .

Arguments

– training_type

Typically it should be "base" for neural networks. For further details, read the training strategies section.

– fnames

– random_seed

– model_args

The model specification.

– fnames

– num_classes

– class_names

– pre_processing

– feature_extraction

The definition of the feature extraction operator. A detailed description of the case when "type": "LightKPConv" and all the shared MLPs / unary convolutions are replaced by hourglass layers and hourglass residual blocks is given below. For a description of the case when "type": "KPConv" see the KPConv operator documentation. For a description of the general case "type": "LightKPConv" see the LightKPConv operator documentation .

– operations_per_depth: See KPConv arguments documentation.
– feature_space_dims: See KPConv arguments documentation.
– bn: See KPConv arguments documentation.
– bn_momentum: See KPConv arguments documentation.
– activate: See KPConv arguments documentation.
– sigma: See KPConv arguments documentation.
– kernel_radius: See KPConv arguments documentation.
– num_kernel_points: See KPConv arguments documentation.
– deformable: See KPConv arguments documentation.
– W_initializer: The initialization method for the weights of each light KPConv. See the keras documentation on initializers for more details.
– W_regularizer: The regularization strategy for the weights of each light KPConv. See the keras documentation on regularizers for more details.
– W_constraint: The constraints of the weights of each light KPConv. See the keras documentation on constraints for more details.
– unary_convolution_wrapper: To mimic a SFL-NET this specification must be set to null because SFL-NET uses a residual hourglass block instead of shared MLPs.

– hourglass_wrapper

The specification of how to use hourglass layers to wrap the feature extraction layers. To mimic a SFL-NET it is necessary to use an hourglass wrapper and avoid unary convolutions at all.

– internal_dim

A list with the internal dimensions for the first transform in a HourglassLayer. NOTE that this value is ignored when a subspace factor \(\alpha\) is given.

– parallel_internal_dim

A list with the internal dimensions for the HourglassLayer in the residual block. NOTE that this value is ignored when a subspace factor \(\alpha\) is given.

– activation

The first activation function (i.e., \(\sigma_1\)) for each HourglassLayer. See the keras documentation on activations for more details.

– activation2

The second activation function (i.e., \(\sigma_2\)) for each HourglassLayer. See the keras documentation on activations for more details.

– activate_postwrap

Whether to include an activation function to finish the wrapping of the feature extractor operator.

– activate_residual

Whether to include an activation function to finish the residual block. Note that the standard practice is to avoid activation functions at the end of residual feature extraction blocks to keep them linear.

– regularize

Whether to regularize each HourglassLayer by adding \(\beta + \mathcal{L}_h\) to the loss function (True) or not (False).

– spectral_strategy

What strategy use to compute the spectral norm. It can be either “unsafe” (fast but might break during training), “safe” (will work during training but can be twice slower), or “approx” (as fast as unsafe but computing the approximated norm after applying a small tikhonov regularization to prevent numerical issues, DEFAULT).

– W1_initializer

The initialization method for the first matrix of weights for each HourglassLayer. See the keras documentation on initializers for more details.

– W1_regularizer

The regularization strategy for the first matrix of weights for each HourglassLayer. See the keras documentation on regularizers for more details.

– W1_constraint

The constraint of the first matrix of weights for each HourglassLayer. See the keras documentation on constraints for more details.

– W2_initializer

The initialization method for the second matrix of weights for each HourglassLayer. See the keras documentation on initializers for more details.

– W2_regularizer

The regularization strategy for the second matrix of weights for each HourglassLayer. See the keras documentation on regularizers for more details.

– W2_constraint

The constraint of the second matrix of weights for each HourglassLayer. See the keras documentation on constraints for more details.

– loss_factor

The loss factor \(\beta\) for any HourglassLayer. It governs the impact of the extra term \(\beta \mathcal{L}_h\) in the loss function. NOTE that the loss factor will only be considered when regularize is set to True.

– subspace_factor

The subspace factor \(\alpha\) for any HourglassLayer. When given, the internal dimensionality \(D_h\) will be:

\[D_h = \alpha \; \max \; \left\{D_{\mathrm{in}}, D_{\mathrm{out}}\right\}\]

NOTE that when given, any specification of the internal dimensionalities will be replaced by the values derived by applying the subspace factor.

– feature_dim_divisor: The divisor to determine the output dimensionality of the pre-wrapper hourglass layer. The dimensionality will be calculated as \(D_{\text{in}} / \text{feature_dim_divisor}\).

– bn: Whether to include batch normalization to the main branch before merging with the residual block.
– bn_momentum: The momentum for the moving average of the batch normalization (as explained for PointNet++ bn_momentum specification ).
– out_bn: Whether to include a batch normalization layer after the linear superposition of the residual block with the main branch (true) or not (false).
– merge_bn: Alias for out_bn. Note that if both are specified, out_bn has preference over merge_bn.
– out_bn_momentum: The momentum for the moving average of the batch normalization after the linear superposition of the residual block with the main branch (as explained for PointNet++ bn_momentum specification ).
– out_activation: Whether to include an activation layer after the linear superposition (and after the batch normalization, if any) of the residual block with the main branch (true) or not (false).

– features_alignment

It must be null to mimic a SFL-NET model. See KPConv arguments documentation for further details.

– downsampling_filter

It must be configured to "strided_lightkpconv" (see StridedLightKPConvLayer) to mimic a SFL-NET model.

– upsampling_filter

The original upsampling strategy for KPConv and derived architectures is "nearest" (i.e., nearest upsampling). However, in VL3D++ examples we often use "mean" for our baseline models because we found it yields better results. See FeaturesUpsamplingLayer and InterdimensionalPointTransformerLayer for more details.

– upsampling_bn

– upsampling_momentum

– conv1d

Boolean flag governing whether to use unary convolutions (shared MLPs) to wrap the hourglass or not. SFL-NET models use hourglass layers instead of shared MLPs so it must be set to False when mimicking this model.

– conv1d_kernel_initializer

– output_kernel_initializer

– model_handling

See KPConv arguments documentation and LightKPConv arguments documentation .

– compilation_args

– training_evaluation_metrics

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_confusion_matrix_report_path

– training_confusion_matrix_report_plot

– training_class_distribution_report_path

– training_classified_point_cloud_path

– training_activations_path

Hierarchical feature extraction with LightKPConv

The ConvAutoencPwiseClassif architecture can be configured using a light-weight version of the KPConvLayer that for \(K\) kernel points uses only two matrices: 1) the weights \(\pmb{W} \in \mathbb{R}^{D_{\mathrm{in}} \times D_{\mathrm{out}}}\) and 2) the scale factors \(\pmb{A} \in \mathbb{R}^{m_q \times D_{\mathrm{in}}}\). Furthe details can be seen in the LightKPConvLayer documentation. The main difference with the classical KPConvLayer consists in updating the original equation:

\[\left(\pmb{P} * \mathcal{Q}\right) (\pmb{x}_{i*}) = \sum_{\pmb{x}_{j*} \in \mathcal{N}_{\pmb{x}_{i*}}}{ \Biggl[{ \sum_{k=1}^{m_q} \max \; \biggl\{ 0, 1 - \dfrac{ \lVert \pmb{x}_{j*} - \pmb{x}_{i*} - \pmb{q}_{k*} \rVert }{ \sigma } \biggr\} } \pmb{W}_{k}^\intercal \Biggr] \pmb{f}_{j*} }\]

to the light-weight version:

\[\left(\pmb{P} * \mathcal{Q} \right) (\pmb{x}_{i*}) = \sum_{\pmb{x}_{j*} \in \mathcal{N}_{\pmb{x}_{i*}}} \left(\operatorname{diag}\left[\sum_{k=1}^{m_q}{ \max \; \left\{ 0, 1 - \dfrac{ \lVert \pmb{x}_{j*} - \pmb{x}_{i*} - \pmb{q}_{k*} \rVert }{ \sigma } \right\} \pmb{a}_{k*} } \right] \pmb{W}\right)^{\intercal} \pmb{f}_{j*}\]

Note that, when all the shared MLPs are replaced by hourglass blocks, the LightKPConvLayer can be used in the context of a ConvAutoencPwiseClassif model to mimic the SFL-NET model as described in the hierarchical feature extraction with SFL-NET section . The rest of this section is devoted to describe the general usage of the LightKPConvLayer. The JSON bellow illustrates how to configure LightKPConv-based hierarchical feature extractors using the VL3D framework.

{
    "in_pcloud": [
        "/oldext4/lidar_data/vl3dhack/data/dales/train/5080_54435.laz"
    ],
    "out_pcloud": [
        "/oldext4/lidar_data/vl3dhack/multiclass/out/DL_LKPC/T1/*"
    ],
    "sequential_pipeline": [
        {
            "class_transformer": "ClassReducer",
            "on_predictions": false,
            "input_class_names": ["noclass", "ground", "vegetation", "cars", "trucks", "powerlines", "fences", "poles", "buildings"],
            "output_class_names": ["ground", "vegetation", "buildings", "powerlines", "objects", "noclass"],
            "class_groups": [["ground"], ["vegetation"], ["buildings"], ["powerlines"], ["cars", "trucks", "fences", "poles"], ["noclass"]],
            "report_path": "*class_reduction.log",
            "plot_path": "*class_reduction.svg"
        },
        {
            "train": "ConvolutionalAutoencoderPwiseClassifier",
            "training_type": "base",
            "fnames": ["ones"],
            "random_seed": null,
            "model_args": {
                "fnames": ["ones"],
                "num_classes": 6,
                "class_names": ["ground", "vegetation", "buildings", "powerlines", "objects", "noclass"],
                "pre_processing": {
                    "pre_processor": "hierarchical_fps",
                    "support_strategy_num_points": 200000,
                    "to_unit_sphere": false,
                    "support_strategy": "fps",
                    "support_chunk_size": 10000,
                    "support_strategy_fast": true,
                    "receptive_field_oversampling": {
                        "min_points": 2,
                        "strategy": "nearest",
                        "k": 3,
                        "radius": 0.5
                    },
                    "center_on_pcloud": true,
                    "neighborhood": {
                        "type": "sphere",
                        "radius": 6.0,
                        "separation_factor": 0.8
                    },
                    "num_points_per_depth": [256, 128, 64, 32, 16],
                    "fast_flag_per_depth": [false, false, false, false, false],
                    "num_downsampling_neighbors": [1, 16, 16, 16, 16],
                    "num_pwise_neighbors": [16, 16, 16, 16, 16],
                    "num_upsampling_neighbors": [1, 16, 16, 16, 16],
                    "nthreads": -1,
                    "training_receptive_fields_distribution_report_path": "*/training_eval/training_receptive_fields_distribution.log",
                    "training_receptive_fields_distribution_plot_path": "*/training_eval/training_receptive_fields_distribution.svg",
                    "training_receptive_fields_dir": "*/training_eval/training_rf/",
                    "receptive_fields_distribution_report_path": "*/training_eval/receptive_fields_distribution.log",
                    "receptive_fields_distribution_plot_path": "*/training_eval/receptive_fields_distribution.svg",
                    "_receptive_fields_dir": "*/training_eval/receptive_fields/",
                    "training_support_points_report_path": "*/training_eval/training_support_points.las",
                    "support_points_report_path": "*/training_eval/support_points.las"
                },
                "feature_extraction": {
                    "type": "LightKPConv",
                    "operations_per_depth": [2, 1, 1, 1, 1],
                    "feature_space_dims": [64, 64, 128, 256, 512, 1024],
                    "bn": true,
                    "bn_momentum": 0.98,
                    "activate": true,
                    "sigma": [6.0, 6.0, 7.5, 9.0, 10.5, 12.0],
                    "kernel_radius": [6.0, 6.0, 6.0, 6.0, 6.0, 6.0],
                    "num_kernel_points": [15, 15, 15, 15, 15, 15],
                    "deformable": [false, false, false, false, false, false],
                    "W_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "W_regularizer": [null, null, null, null, null, null],
                    "W_constraint": [null, null, null, null, null, null],
                    "A_trainable": [true, true, true, true, true ,true],
                    "A_regularizer": [null, null, null, null, null, null],
                    "A_constraint": [null, null, null, null, null, null],
                    "A_initializer": ["ones", "ones", "ones", "ones", "ones", "ones"],
                    "_unary_convolution_wrapper": {
                        "activation": "relu",
                        "initializer": "glorot_uniform",
                        "bn": true,
                        "bn_momentum": 0.98,
                        "feature_dim_divisor": 2
                    },
                    "hourglass_wrapper": {
                        "internal_dim": [2, 2, 4, 16, 32, 64],
                        "parallel_internal_dim": [8, 8, 16, 32, 64, 128],
                        "activation": ["relu", "relu", "relu", "relu", "relu", "relu"],
                        "activation2": [null, null, null, null, null, null],
                        "regularize": [true, true, true, true, true, true],
                        "W1_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                        "W1_regularizer": [null, null, null, null, null, null],
                        "W1_constraint": [null, null, null, null, null, null],
                        "W2_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                        "W2_regularizer": [null, null, null, null, null, null],
                        "W2_constraint": [null, null, null, null, null, null],
                        "loss_factor": 0.1,
                        "subspace_factor": 0.125,
                        "feature_dim_divisor": 4,
                        "bn": false,
                        "bn_momentum": 0.98,
                        "out_bn": true,
                        "out_bn_momentum": 0.98,
                        "out_activation": "relu"
                    }
                },
                "features_alignment": null,
                "downsampling_filter": "strided_lightkpconv",
                "upsampling_filter": "mean",
                "upsampling_bn": true,
                "upsampling_momentum": 0.98,
                "_upsampling_hourglass": {
                    "activation": "relu",
                    "activation2": null,
                    "regularize": true,
                    "W1_initializer": "glorot_uniform",
                    "W1_regularizer": null,
                    "W1_constraint": null,
                    "W2_initializer": "glorot_uniform",
                    "W2_regularizer": null,
                    "W2_constraint": null,
                    "loss_factor": 0.1,
                    "subspace_factor": 0.125
                },
                "conv1d": true,
                "conv1d_kernel_initializer": "glorot_normal",
                "output_kernel_initializer": "glorot_normal",
                "model_handling": {
                    "summary_report_path": "*/model_summary.log",
                    "training_history_dir": "*/training_eval/history",
                    "_features_structuring_representation_dir": "*/training_eval/feat_struct_layer/",
                    "kpconv_representation_dir": "*/training_eval/kpconv_layers/",
                    "skpconv_representation_dir": "*/training_eval/skpconv_layers/",
                    "lkpconv_representation_dir": "*/training_eval/lkpconv_layers/",
                    "slkpconv_representation_dir": "*/training_eval/slkpconv_layers/",
                    "class_weight": [1.0, 1.0, 1.0, 1.0, 1.0, 0.0],
                    "training_epochs": 300,
                    "batch_size": 64,
                    "training_sequencer": {
                        "type": "DLSequencer",
                        "random_shuffle_indices": true,
                        "augmentor": {
                            "transformations": [
                                    {
                                        "type": "Rotation",
                                        "axis": [0, 0, 1],
                                        "angle_distribution": {
                                            "type": "uniform",
                                            "start": -3.141592,
                                            "end": 3.141592
                                        }
                                    },
                                    {
                                        "type": "Scale",
                                        "scale_distribution": {
                                            "type": "uniform",
                                            "start": 0.99,
                                            "end": 1.01
                                        }
                                    },
                                    {
                                        "type": "Jitter",
                                        "noise_distribution": {
                                            "type": "normal",
                                            "mean": 0,
                                            "stdev": 0.001
                                        }
                                    }
                            ]
                        }
                    },
                    "prediction_reducer": {
                        "reduce_strategy" : {
                            "type": "MeanPredReduceStrategy"
                        },
                        "select_strategy": {
                            "type": "ArgMaxPredSelectStrategy"
                        }
                    },
                    "checkpoint_path": "*/checkpoint.weights.h5",
                    "checkpoint_monitor": "loss",
                    "learning_rate_on_plateau": {
                        "monitor": "loss",
                        "mode": "min",
                        "factor": 0.1,
                        "patience": 2000,
                        "cooldown": 5,
                        "min_delta": 0.01,
                        "min_lr": 1e-6
                    }
                },
                "compilation_args": {
                    "optimizer": {
                        "algorithm": "Adam",
                        "learning_rate": {
                            "schedule": "exponential_decay",
                            "schedule_args": {
                                "initial_learning_rate": 1e-2,
                                "decay_steps": 9000,
                                "decay_rate": 0.96,
                                "staircase": false
                            }
                        }
                    },
                    "loss": {
                        "function": "class_weighted_categorical_crossentropy"
                    },
                    "metrics": [
                        "categorical_accuracy"
                    ]
                },
                "architecture_graph_path": "*/model_graph.png",
                "architecture_graph_args": {
                    "show_shapes": true,
                    "show_dtype": true,
                    "show_layer_names": true,
                    "rankdir": "TB",
                    "expand_nested": true,
                    "dpi": 300,
                    "show_layer_activations": true
                }
            },
            "autoval_metrics": ["OA", "P", "R", "F1", "IoU", "wP", "wR", "wF1", "wIoU", "MCC", "Kappa"],
            "training_evaluation_metrics": ["OA", "P", "R", "F1", "IoU", "wP", "wR", "wF1", "wIoU", "MCC", "Kappa"],
            "training_class_evaluation_metrics": ["P", "R", "F1", "IoU"],
            "training_evaluation_report_path": "*/training_eval/evaluation.log",
            "training_class_evaluation_report_path": "*/training_eval/class_evaluation.log",
            "training_confusion_matrix_report_path": "*/training_eval/confusion.log",
            "training_confusion_matrix_plot_path": "*/training_eval/confusion.svg",
            "training_class_distribution_report_path": "*/training_eval/class_distribution.log",
            "training_class_distribution_plot_path": "*/training_eval/class_distribution.svg",
            "training_classified_point_cloud_path": "*/training_eval/classified_point_cloud.las",
            "training_activations_path": null
        },
        {
            "writer": "PredictivePipelineWriter",
            "out_pipeline": "*/model/LKPConv.pipe",
            "include_writer": false,
            "include_imputer": true,
            "include_feature_transformer": true,
            "include_miner": true,
            "include_class_transformer": false,
            "include_clustering": false,
            "ignore_predictions": false
        }
    ]
}

The JSON above defines a ConvAutoencPwiseClassif that uses a hierarchical furthest point sampling strategy with a 3D spherical neighborhood to prepare the input for a LightKPConv-based model. It uses HourglassLayer and StridedLightKPConvLayer during the hierarchical encoding (similar to a SFL-NET model) and a FeaturesUpsamplingLayer with a mean reduction as well as shared MLPs (unary convolutions) during the hierarchical decoding.

Arguments

– training_type

Typically it should be "base" for neural networks. For further details, read the training strategies section.

– fnames

– random_seed

– model_args

The model specification.

– fnames

– num_classes

– class_names

– pre_processing

– feature_extraction

The definition of the feature extraction operator. A detailed description of the case when "type": "LightKPConv" is given below. For a description of the case when "type": "PointNet" see the PointNet operator documentation, for the case "type": "KPConv" see the KPConv operator documentation, and to mimic a SFL-NET model see the SFL-NET documentation.

– operations_per_depth: See KPConv arguments documentation.
– feature_space_dims: See KPConv arguments documentation.
– bn: See KPConv arguments documentation.
– bn_momentum: See KPConv arguments documentation.
– activate: See KPConv arguments documentation.
– sigma: See KPConv arguments documentation.
– kernel_radius: See KPConv arguments documentation.
– num_kernel_points: See KPConv arguments documentation.
– deformable: See KPConv arguments documentation.
– W_initializer: The initialization method for the weights of each light KPConv. See the keras documentation on initializers for more details.
– W_regularizer: The regularization strategy for weights of each light KPConv. See the keras documentation on regularizers for more details.
– W_constraint: The constraints of the weights of each light KPConv. See the keras documentation on constraints for more details.
– unary_convolution_wrapper: It can be used to configure a LightKPconv model that uses shared MLPs to wrap the feature extraction operators like a KPConv model or it can be set to null to use an hourglass_wrapper instead, similar to a SFL-NET model. See the KPConv arguments documentation for further details.
– hourglass_wrapper: The specification of how to use hourglass layers to wrap the feature extraction layers. See the SFL-NET arguments documentation for further details.

– features_alignment

– downsampling_filter

It can be configured to "strided_lightkpconv" (see StridedLightKPConvLayer) but it is also possible to use "strided_kpconv" to use the classical StridedKPConvLayer during downsampling. The FeaturesDownsamplingLayer and InterdimensionalPointTransformerLayer are also supported.

– upsampling_filter

The original upsampling strategy for KPConv and derived architectures is "nearest" (i.e., nearest upsampling). However, in VL3D++ examples we often use "mean" for our baseline models because we found it yields better results. See FeaturesUpsamplingLayer and InterdimensionalPointTransformerLayer for more details.

– upsampling_bn

– upsampling_momentum

– conv1d

Boolean flag governing whether to use unary convolutions (shared MLPs) to wrap the hourglass or not. SFL-NET models use hourglass layers instead (i.e., False), classical KPConv models use shared MLPs instead (i.e., True).

– conv1d_kernel_initializer

– output_kernel_initializer

– model_handling

The model handling specification can be read in the KPConv arguments documentation. Here, only the special arguments for LightKPConv-based models are detailed:

– lkpconv_representation_dir: Path where the plots and CSV data representing the LightKPConv layers will be stored.
– slkpconv_representation_dir: Path where the plots and CSV data representing the strided LightKPConv layers will be stored.

– compilation_args

– training_evaluation_metrics

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_confusion_matrix_report_path

– training_confusion_matrix_report_plot

– training_class_distribution_report_path

– training_classified_point_cloud_path

– training_activations_path

Hierarchical feature extraction with PointTransformer

The ConvAutoencPwiseClassif architecture can be configured using PointTransformerLayer as the feature extraction strategy. Besides, the downsampling and upsampling operations can be carried out through InterdimensionalPointTransformerLayer. The PointTransformerLayer feature extractor can be summarized through the following equation

\[\pmb{\hat{f}}_{i*} = \sum_{\pmb{x}_{j*} \in \mathcal{N}(\pmb{x}_{i*})}{ \sigma\bigl( \gamma(\psi(\pmb{f}_{j*}) - \phi(\pmb{f}_{i*}) + \delta(\pmb{x}_{i*}, \pmb{x}_{j*})) \bigr) \odot \bigl( \alpha(\pmb{f}_{j*}) + \delta(\pmb{x}_{i*}, \pmb{x}_{j*}) \bigr) } ,\]

where the positional encoding \(\delta(\pmb{x}_{i*}, \pmb{x}_{j*})\) corresponds to

\[\delta(\pmb{x}_{i*}, \pmb{x}_{j*}) = \tilde{\sigma}_{\theta}\bigl( \sigma_{\theta}( (\pmb{x}_{j*} - \pmb{x}_{i*}) \pmb{\Theta} \oplus \pmb{\theta} ) \pmb{\widetilde{\Theta}} \oplus \pmb{\tilde{\theta}} \bigr) .\]

For further details about the variables see the PointTransformerLayer class documentation and the Point Transformer paper (Zhao et al., 2021).

The JSON below illustrates how to configure Point Transformer-based hierarchical feature extractors using the VL3D++ framework.

{
    "in_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/mined/Mar18_train_hsv_std.laz"
    ],
    "out_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/out/pttransf/T1/*"
    ],
    "sequential_pipeline": [
        {
            "train": "ConvolutionalAutoencoderPwiseClassifier",
            "training_type": "base",
            "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
            "random_seed": null,
            "model_args": {
                "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
                "num_classes": 11,
                "class_names": ["LowVeg", "ImpSurf", "Vehicle", "UrbanFurni", "Roof", "Facade", "Shrub", "Tree", "Soil/Gravel", "VertSurf", "Chimney"],
                "pre_processing": {
                    "pre_processor": "hierarchical_fpspp",
                    "support_strategy_num_points": 25000,
                    "to_unit_sphere": false,
                    "support_strategy": "fps",
                    "support_strategy_fast": 2,
                    "min_distance": 0.03,
                    "receptive_field_oversampling": {
                        "min_points": 2,
                        "strategy": "nearest",
                        "k": 3,
                        "radius": 0.5
                    },
                    "center_on_pcloud": true,
                    "training_class_distribution": [2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250],
                    "neighborhood": {
                        "type": "sphere",
                        "radius": 5.0,
                        "separation_factor": 0.8
                    },
                    "num_points_per_depth": [4096, 1024, 256, 64, 16],
                    "fast_flag_per_depth": [4, 4, false, false, false],
                    "num_downsampling_neighbors": [1, 16, 16, 16, 16],
                    "num_pwise_neighbors": [16, 16, 16, 16, 16],
                    "num_upsampling_neighbors": [1, 16, 16, 16, 16],
                    "nthreads": -1,
                    "training_receptive_fields_distribution_report_path": null,
                    "training_receptive_fields_distribution_plot_path": null,
                    "training_receptive_fields_dir": null,
                    "receptive_fields_distribution_report_path": null,
                    "receptive_fields_distribution_plot_path": null,
                    "receptive_fields_dir": null,
                    "training_support_points_report_path": null,
                    "support_points_report_path": null
                },
                "feature_extraction": {
                    "type": "PointTransformer",
                    "operations_per_depth": [2, 1, 1, 1, 1],
                    "feature_space_dims": [64, 64, 96, 128, 192, 256],
                    "bn": true,
                    "bn_momentum": 0.98,
                    "activate": true,
                    "Phi_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "Phi_regularizer": [null, null, null, null, null, null],
                    "Phi_constraint": [null, null, null, null, null, null],
                    "Psi_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "Psi_regularizer": [null, null, null, null, null, null],
                    "Psi_constraint": [null, null, null, null, null, null],
                    "A_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "A_regularizer": [null, null, null, null, null, null],
                    "A_constraint": [null, null, null, null, null, null],
                    "Gamma_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "Gamma_regularizer": [null, null, null, null, null, null],
                    "Gamma_constraint": [null, null, null, null, null, null],
                    "Theta_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "Theta_regularizer": [null, null, null, null, null, null],
                    "Theta_constraint": [null, null, null, null, null, null],
                    "ThetaTilde_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "ThetaTilde_regularizer": [null, null, null, null, null, null],
                    "ThetaTilde_constraint": [null, null, null, null, null, null],
                    "phi_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "phi_regularizer": [null, null, null, null, null, null],
                    "phi_constraint": [null, null, null, null, null, null],
                    "psi_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "psi_regularizer": [null, null, null, null, null, null],
                    "psi_constraint": [null, null, null, null, null, null],
                    "a_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "a_regularizer": [null, null, null, null, null, null],
                    "a_constraint": [null, null, null, null, null, null],
                    "gamma_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "gamma_regularizer": [null, null, null, null, null, null],
                    "gamma_constraint": [null, null, null, null, null, null],
                    "theta_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "theta_regularizer": [null, null, null, null, null, null],
                    "theta_constraint": [null, null, null, null, null, null],
                    "thetaTilde_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "thetaTilde_regularizer": [null, null, null, null, null, null],
                    "thetaTilde_constraint": [null, null, null, null, null, null],
                    "point_transformer_wrapper": {
                        "feature_dim_divisor": 2,
                        "residual": true,
                        "bn": true,
                        "postwrap_bn": true,
                        "merge_bn": false,
                        "bn_momentum": 0.98,
                        "activation": "relu",
                        "activate_postwrap": true,
                        "activate_residual": false,
                        "Phi_initializer": "glorot_uniform",
                        "Phi_regularizer": null,
                        "Phi_constraint": null,
                        "Psi_initializer": "glorot_uniform",
                        "Psi_regularizer": null,
                        "Psi_constraint": null,
                        "A_initializer": "glorot_uniform",
                        "A_regularizer": null,
                        "A_constraint": null,
                        "Gamma_initializer": "glorot_uniform",
                        "Gamma_regularizer": null,
                        "Gamma_constraint": null,
                        "Theta_initializer": "glorot_uniform",
                        "Theta_regularizer": null,
                        "Theta_constraint": null,
                        "ThetaTilde_initializer": "glorot_uniform",
                        "ThetaTilde_regularizer": null,
                        "ThetaTilde_constraint": null,
                        "phi_initializer": "glorot_uniform",
                        "phi_regularizer": null,
                        "phi_constraint": null,
                        "psi_initializer": "glorot_uniform",
                        "psi_regularizer": null,
                        "psi_constraint": null,
                        "a_initializer": "glorot_uniform",
                        "a_regularizer": null,
                        "a_constraint": null,
                        "gamma_initializer": "glorot_uniform",
                        "gamma_regularizer": null,
                        "gamma_constraint": null,
                        "theta_initializer": "glorot_uniform",
                        "theta_regularizer": null,
                        "theta_constraint": null,
                        "thetaTilde_initializer": "glorot_uniform",
                        "thetaTilde_regularizer": null,
                        "thetaTilde_constraint": null
                    }
                },
                "features_alignment": null,
                "downsampling_filter": "interdimensional_point_transformer",
                "upsampling_filter": "interdimensional_point_transformer",
                "upsampling_bn": true,
                "upsampling_momentum": 0.98,
                "conv1d": false,
                "conv1d_kernel_initializer": "glorot_normal",
                "output_kernel_initializer": "glorot_normal",
                "model_handling": {
                    "summary_report_path": "*/model_summary.log",
                    "training_history_dir": "*/training_eval/history",
                    "class_weight": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                    "training_epochs": 150,
                    "batch_size": 16,
                    "training_sequencer": {
                        "type": "DLSequencer",
                        "random_shuffle_indices": true,
                        "augmentor": {
                            "transformations": [
                                    {
                                        "type": "Rotation",
                                        "axis": [0, 0, 1],
                                        "angle_distribution": {
                                            "type": "uniform",
                                            "start": -3.141592,
                                            "end": 3.141592
                                        }
                                    },
                                    {
                                        "type": "Scale",
                                        "scale_distribution": {
                                            "type": "uniform",
                                            "start": 0.985,
                                            "end": 1.015
                                        }
                                    },
                                    {
                                        "type": "Jitter",
                                        "noise_distribution": {
                                            "type": "normal",
                                            "mean": 0,
                                            "stdev": 0.0033
                                        }
                                    }
                            ]
                        }
                    },
                    "prediction_reducer": {
                        "reduce_strategy" : {
                            "type": "MeanPredReduceStrategy"
                        },
                        "select_strategy": {
                            "type": "ArgMaxPredSelectStrategy"
                        }
                    },
                    "checkpoint_path": "*/checkpoint.weights.h5",
                    "checkpoint_monitor": "loss",
                    "learning_rate_on_plateau": {
                        "monitor": "loss",
                        "mode": "min",
                        "factor": 0.1,
                        "patience": 2000,
                        "cooldown": 5,
                        "min_delta": 0.01,
                        "min_lr": 1e-6
                    }
                },
                "compilation_args": {
                    "optimizer": {
                        "algorithm": "Adam",
                        "learning_rate": {
                            "schedule": "exponential_decay",
                            "schedule_args": {
                                "initial_learning_rate": 1e-2,
                                "decay_steps": 5000,
                                "decay_rate": 0.96,
                                "staircase": false
                            }
                        }
                    },
                    "loss": {
                        "function": "class_weighted_categorical_crossentropy"
                    },
                    "metrics": [
                        "categorical_accuracy"
                    ]
                },
                "architecture_graph_path": "*/model_graph.png",
                "architecture_graph_args": {
                    "show_shapes": true,
                    "show_dtype": true,
                    "show_layer_names": true,
                    "rankdir": "TB",
                    "expand_nested": true,
                    "dpi": 300,
                    "show_layer_activations": true
                }
            },
            "autoval_metrics": null,
            "training_evaluation_metrics": null,
            "training_class_evaluation_metrics": null,
            "training_evaluation_report_path": null,
            "training_class_evaluation_report_path": null,
            "training_confusion_matrix_report_path": null,
            "training_confusion_matrix_plot_path": null,
            "training_class_distribution_report_path": null,
            "training_class_distribution_plot_path": null,
            "training_classified_point_cloud_path": null,
            "training_activations_path": null
        },
        {
            "writer": "PredictivePipelineWriter",
            "out_pipeline": "*/model/PointTransformer.pipe",
            "include_writer": false,
            "include_imputer": true,
            "include_feature_transformer": true,
            "include_miner": true,
            "include_class_transformer": false,
            "include_clustering": false,
            "ignore_predictions": false
        }
    ]
}

The JSON above defines a ConvAutoencPwiseClassif that uses a hierarchical furthest point sampling strategy with a 3D spherical neighborhood to prepare the input for a PointTransformer-based model. It uses PointTransformerLayer for feature extraction and InterdimensionalPointTransformerLayer for downsampling and upsampling.

Arguments

– training_type

Typically it should be "base" for neural networks. For further details, read the training strategies section.

– fnames

– random_seed

– model_args

The model specification.

– fnames

– num_classes

– class_names

– pre_processing

– feature_extraction

The definition of the feature extraction operator. A detailed description of the case when "type": "PointTransformer" is given below. For a description of the case when "type": "PointNet" see the PointNet operator documentation, for the case "type": "KPConv" see the KPConv operator documentation, to mimic a SFL-NET model see the SFL-NET documentation, and for the case "type": "LightKPConv" see the LightKPConv operator documentation.

– operations_per_depth: See KPConv arguments documentation.
– feature_space_dims: See KPConv arguments documentation.
– bn: See KPConv arguments documentation.
– bn_momentum: See KPConv arguments documentation.
– activate: See KPConv arguments documentation.

– Phi_initializer: The initialization method for the \(\pmb{\Phi}\) weights matrix of each PointTransformer. See the keras documentation on initializers for more details.

– Phi_regularizer: The regularization strategy for the \(\pmb{\Phi}\) weights matrix of each PointTransformer. See the keras documentation on regularizers for more details.

– Phi_constraint: The constraints of the \(\pmb{\Phi}\) weights matrix of each Point Transformer. See the keras documentation on constraints for more details.

– Psi_initializer: The initialization method for the \(\pmb{\Psi}\) weights matrix of each PointTransformer. See the keras documentation on initializers for more details.

– Psi_regularizer: The regularization strategy for the \(\pmb{\Psi}\) weights matrix of each PointTransformer. See the keras documentation on regularizers for more details.

– Psi_constraint: The constraints of the \(\pmb{\Psi}\) weights matrix of each Point Transformer. See the keras documentation on constraints for more details.

– A_initializer: The initialization method for the \(\pmb{A}\) weights matrix of each PointTransformer. See the keras documentation on initializers for more details.

– A_regularizer: The regularization strategy for the \(\pmb{A}\) weights matrix of each PointTransformer. See the keras documentation on regularizers for more details.

– A_constraint: The constraints of the \(\pmb{A}\) weights matrix of each Point Transformer. See the keras documentation on constraints for more details.

– Gamma_initializer: The initialization method for the \(\pmb{\Gamma}\) weights matrix of each PointTransformer. See the keras documentation on initializers for more details.

– Gamma_regularizer: The regularization strategy for the \(\pmb{\Gamma}\) weights matrix of each PointTransformer. See the keras documentation on regularizers for more details.

– Gamma_constraint: The constraints of the \(\pmb{\Gamma}\) weights matrix of each Point Transformer. See the keras documentation on constraints for more details.

– Theta_initializer: The initialization method for the \(\pmb{\Theta}\) weights matrix of each PointTransformer. See the keras documentation on initializers for more details.

– Theta_regularizer: The regularization strategy for the \(\pmb{\Theta}\) weights matrix of each PointTransformer. See the keras documentation on regularizers for more details.

– Theta_constraint: The constraints of the \(\pmb{\Theta}\) weights matrix of each Point Transformer. See the keras documentation on constraints for more details.

– ThetaTilde_initializer: The initialization method for the \(\pmb{\widetilde{\Theta}}\) weights matrix of each PointTransformer. See the keras documentation on initializers for more details.

– ThetaTilde_regularizer: The regularization strategy for the \(\pmb{\widetilde{\Theta}}\) weights matrix of each PointTransformer. See the keras documentation on regularizers for more details.

– ThetaTilde_constraint: The constraints of the \(\pmb{\widetilde{\Theta}}\) weights matrix of each Point Transformer. See the keras documentation on constraints for more details.

– phi_initializer: The initialization method for the \(\pmb{\phi}\) weights vector of each PointTransformer. See the keras documentation on initializers for more details.

– phi_regularizer: The regularization strategy for the \(\pmb{\phi}\) weights vector of each PointTransformer. See the keras documentation on regularizers for more details.

– phi_constraint: The constraints of the \(\pmb{\phi}\) weights vector of each Point Transformer. See the keras documentation on constraints for more details.

– psi_initializer: The initialization method for the \(\pmb{\psi}\) weights vector of each PointTransformer. See the keras documentation on initializers for more details.

– psi_regularizer: The regularization strategy for the \(\pmb{\psi}\) weights vector of each PointTransformer. See the keras documentation on regularizers for more details.

– psi_constraint: The constraints of the \(\pmb{\psi}\) weights vector of each Point Transformer. See the keras documentation on constraints for more details.

– a_initializer: The initialization method for the \(\pmb{a}\) weights vector of each PointTransformer. See the keras documentation on initializers for more details.

– a_regularizer: The regularization strategy for the \(\pmb{a}\) weights vector of each PointTransformer. See the keras documentation on regularizers for more details.

– a_constraint: The constraints of the \(\pmb{a}\) weights vector of each Point Transformer. See the keras documentation on constraints for more details.

– gamma_initializer: The initialization method for the \(\pmb{\gamma}\) weights vector of each PointTransformer. See the keras documentation on initializers for more details.

– gamma_regularizer: The regularization strategy for the \(\pmb{\gamma}\) weights vector of each PointTransformer. See the keras documentation on regularizers for more details.

– gamma_constraint: The constraints of the \(\pmb{\gamma}\) weights vector of each Point Transformer. See the keras documentation on constraints for more details.

– theta_initializer: The initialization method for the \(\pmb{\theta}\) weights vector of each PointTransformer. See the keras documentation on initializers for more details.

– theta_regularizer: The regularization strategy for the \(\pmb{\theta}\) weights vector of each PointTransformer. See the keras documentation on regularizers for more details.

– theta_constraint: The constraints of the \(\pmb{\theta}\) weights vector of each Point Transformer. See the keras documentation on constraints for more details.

– thetaTilde_initializer: The initialization method for the \(\pmb{\tilde{\theta}}\) weights vector of each PointTransformer. See the keras documentation on initializers for more details.

– thetaTilde_regularizer: The regularization strategy for the \(\pmb{\tilde{\Theta}}\) weights vector of each PointTransformer. See the keras documentation on regularizers for more details.

– thetaTilde_constraint: The constraints of the \(\pmb{\tilde{\theta}}\) weights vector of each Point Transformer. See the keras documentation on constraints for more details.
– unary_convolution_wrapper: It can be used to configure a LightKPconv model that uses shared MLPs to wrap the feature extraction operators like a KPConv model or it can be set to null to use an hourglass_wrapper instead, similar to a SFL-NET model. See the KPConv arguments documentation for further details.
– hourglass_wrapper: The specification of how to use hourglass layers to wrap the feature extraction layers. See the SFL-NET arguments documentation for further details.

– point_transformer_wrapper

The specification of how to use Point Transformer layers to wrap the feature extraction layers (with/out residual block).

– feature_dim_divisor: See SFL-NET hourglass documentation on feature_dim_divisor .
– residual: Whether to include another PointTransformerLayer in a residual branch. Default is false.
– bn: See SFL-NET hourglass documentation on batch normalization
– postwrap_bn: Whether to include a batch normalization layer after the feature extractor but before merging with the parallel branch.
– merge_bn: Whether to include a batch normalization layer after the linear superposition of the residual block with the main branch (true) or not (false).
– bn_momentum: The momentum for the moving average of the batch normalization (as explained for PointNet++ bn_momentum specification ).
– activation: The activation function for the wrapper and residual point transformers. See the keras documentation on activations for more details.
– activate_postwrap: Whether to include an activation function after the point transformer (after the batch normalization, if any) but before merging with the residual parallel branch.
– activate_residual: Whether to activate the parallel branch after the feature extraction (and the batch normalization, if any). Note that when using parallel branches as residual blocks the typical approach is to avoid activation to keep it linear.
– Phi_initializer: See the Phi initializer documentation.
– Phi_regularizer: See the Phi initializer documentation.
– Phi_constraint: See the Phi initializer documentation.
– Psi_initializer: See the Psi initializer documentation.
– Psi_regularizer: See the Psi initializer documentation.
– Psi_constraint: See the Psi initializer documentation.
– Gamma_initializer: See the Gamma initializer documentation.
– Gamma_regularizer: See the Gamma initializer documentation.
– Gamma_constraint: See the Gamma initializer documentation.
– A_initializer: See the A initializer documentation.
– A_regularizer: See the A initializer documentation.
– A_constraint: See the A initializer documentation.
– Theta_initializer: See the Theta initializer documentation.
– Theta_regularizer: See the Theta initializer documentation.
– Theta_constraint: See the Theta initializer documentation.
– ThetaTilde_initializer: See the ThetaTilde initializer documentation.
– ThetaTilde_regularizer: See the ThetaTilde initializer documentation.
– ThetaTilde_constraint: See the ThetaTilde initializer documentation.
– phi_initializer: See the phi initializer documentation.
– phi_regularizer: See the phi initializer documentation.
– phi_constraint: See the phi initializer documentation.
– psi_initializer: See the psi initializer documentation.
– psi_regularizer: See the psi initializer documentation.
– psi_constraint: See the psi initializer documentation.
– gamma_initializer: See the gamma initializer documentation.
– gamma_regularizer: See the gamma initializer documentation.
– gamma_constraint: See the gamma initializer documentation.
– a_initializer: See the a initializer documentation.
– a_regularizer: See the a initializer documentation.
– a_constraint: See the a initializer documentation.
– theta_initializer: See the theta initializer documentation.
– theta_regularizer: See the theta initializer documentation.
– theta_constraint: See the theta initializer documentation.
– thetaTilde_initializer: See the thetaTilde initializer documentation.
– thetaTilde_regularizer: See the thetaTilde initializer documentation.
– thetaTilde_constraint: See the thetaTilde initializer documentation.

– features_alignment

– downsampling_filter

It can be configured to "strided_lightkpconv" (see StridedLightKPConvLayer) but it is also possible to use "strided_kpconv" to use the classical StridedKPConvLayer during downsampling. The FeaturesDownsamplingLayer and InterdimensionalPointTransformerLayer are also supported.

– upsampling_filter

The original upsampling strategy for KPConv and derived architectures is "nearest" (i.e., nearest upsampling). However, in VL3D++ examples we often use "mean" for our baseline models because we found it yields better results. See FeaturesUpsamplingLayer and InterdimensionalPointTransformerLayer for more details.

– upsampling_bn

– upsampling_momentum

– conv1d

Boolean flag governing whether to use unary convolutions (shared MLPs) to wrap the hourglass or not. SFL-NET models use hourglass layers instead (i.e., False), classical KPConv models use shared MLPs instead (i.e., True).

– conv1d_kernel_initializer

– output_kernel_initializer

– model_handling: The model handling specification can be read in the KPConv arguments documentation.
– compilation_args: See KPConv arguments documentation.

– training_evaluation_metrics

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_confusion_matrix_report_path

– training_confusion_matrix_report_plot

– training_class_distribution_report_path

– training_classified_point_cloud_path

– training_activations_path

Hierarchical feature extraction with GroupedPointTransformer

The ConvAutoencPwiseClassif architecture can be configured using GroupedPointTransformerLayer as the feature extraction strategy. For further details about the variables see the GroupedPointTransformerLayer class documentation and the Point Transformer v2 paper about Grouped Vector Attention (Wu et al., 2022).

The JSON below illustrates how to configure Grouped Point Transformer-based hierarchical feature extractors using the VL3D++ framework.

{
    "in_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/mined/Mar18_train_hsv_std.laz"
    ],
    "out_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/out/gpttransf_alt/T1/*"
    ],
    "sequential_pipeline": [
        {
            "train": "ConvolutionalAutoencoderPwiseClassifier",
            "training_type": "base",
            "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
            "random_seed": null,
            "model_args": {
                "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
                "num_classes": 11,
                "class_names": ["LowVeg", "ImpSurf", "Vehicle", "UrbanFurni", "Roof", "Facade", "Shrub", "Tree", "Soil/Gravel", "VertSurf", "Chimney"],
                "pre_processing": {
                    "pre_processor": "hierarchical_fpspp",
                    "support_strategy_num_points": 25000,
                    "to_unit_sphere": false,
                    "support_strategy": "fps",
                    "support_strategy_fast": 2,
                    "min_distance": 0.03,
                    "receptive_field_oversampling": {
                        "min_points": 2,
                        "strategy": "nearest",
                        "k": 3,
                        "radius": 0.5
                    },
                    "center_on_pcloud": true,
                    "training_class_distribution": [2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250],
                    "neighborhood": {
                        "type": "sphere",
                        "radius": 5.0,
                        "separation_factor": 0.8
                    },
                    "num_points_per_depth": [4096, 1024, 256, 64, 16],
                    "fast_flag_per_depth": [4, 4, false, false, false],
                    "num_downsampling_neighbors": [1, 16, 16, 16, 16],
                    "num_pwise_neighbors": [16, 16, 16, 16, 16],
                    "num_upsampling_neighbors": [1, 16, 16, 16, 16],
                    "nthreads": -1,
                    "training_receptive_fields_distribution_report_path": null,
                    "training_receptive_fields_distribution_plot_path": null,
                    "training_receptive_fields_dir": null,
                    "receptive_fields_distribution_report_path": null,
                    "receptive_fields_distribution_plot_path": null,
                    "receptive_fields_dir": null,
                    "training_support_points_report_path": null,
                    "support_points_report_path": null
                },
                "feature_extraction": {
                    "type": "GroupedPointTransformer",
                    "operations_per_depth": [2, 1, 1, 1, 1],
                    "feature_space_dims": [64, 64, 96, 128, 192, 256],
                    "init_ftransf_bn": true,
                    "init_ftransf_bn_momentum": 0.98,
                    "groups": [8, 8, 12, 16, 24, 32],
                    "dropout_rate": [0.25, 0.25, 0.25, 0.25, 0.25, 0.25],
                    "bn": false,
                    "bn_momentum": 0.98,
                    "activate": false,
                    "Q_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "Q_regularizer": [null, null, null, null, null, null],
                    "Q_constraint": [null, null, null, null, null, null],
                    "Q_bn_momentum": [0.98, 0.98, 0.98, 0.98, 0.98, 0.98],
                    "q_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "q_regularizer": [null, null, null, null, null, null],
                    "q_constraint": [null, null, null, null, null, null],
                    "K_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "K_regularizer": [null, null, null, null, null, null],
                    "K_constraint": [null, null, null, null, null, null],
                    "K_bn_momentum": [0.98, 0.98, 0.98, 0.98, 0.98, 0.98],
                    "k_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "k_regularizer": [null, null, null, null, null, null],
                    "k_constraint": [null, null, null, null, null, null],
                    "V_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "V_regularizer": [null, null, null, null, null, null],
                    "V_constraint": [null, null, null, null, null, null],
                    "v_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "v_regularizer": [null, null, null, null, null, null],
                    "v_constraint": [null, null, null, null, null, null],
                    "ThetaA_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "ThetaA_regularizer": [null, null, null, null, null, null],
                    "ThetaA_constraint": [null, null, null, null, null, null],
                    "thetaA_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "thetaA_regularizer": [null, null, null, null, null, null],
                    "thetaA_constraint": [null, null, null, null, null, null],
                    "ThetaTildeA_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "ThetaTildeA_regularizer": [null, null, null, null, null, null],
                    "ThetaTildeA_constraint": [null, null, null, null, null, null],
                    "thetaTildeA_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "thetaTildeA_regularizer": [null, null, null, null, null, null],
                    "thetaTildeA_constraint": [null, null, null, null, null, null],
                    "deltaA_bn_momentum": [0.98, 0.98, 0.98, 0.98, 0.98, 0.98],
                    "ThetaB_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "ThetaB_regularizer": [null, null, null, null, null, null],
                    "ThetaB_constraint": [null, null, null, null, null, null],
                    "thetaB_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "thetaB_regularizer": [null, null, null, null, null, null],
                    "thetaB_constraint": [null, null, null, null, null, null],
                    "ThetaTildeB_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "ThetaTildeB_regularizer": [null, null, null, null, null, null],
                    "ThetaTildeB_constraint": [null, null, null, null, null, null],
                    "thetaTildeB_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "thetaTildeB_regularizer": [null, null, null, null, null, null],
                    "thetaTildeB_constraint": [null, null, null, null, null, null],
                    "deltaB_bn_momentum": [0.98, 0.98, 0.98, 0.98, 0.98, 0.98],
                    "Omega_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "Omega_regularizer": [null, null, null, null, null, null],
                    "Omega_constraint": [null, null, null, null, null, null],
                    "omega_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "omega_regularizer": [null, null, null, null, null, null],
                    "omega_constraint": [null, null, null, null, null, null],
                    "omega_bn_momentum": [0.98, 0.98, 0.98, 0.98, 0.98, 0.98],
                    "OmegaTilde_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "OmegaTilde_regularizer": [null, null, null, null, null, null],
                    "OmegaTilde_constraint": [null, null, null, null, null, null],
                    "omegaTilde_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "omegaTilde_regularizer": [null, null, null, null, null, null],
                    "omegaTilde_constraint": [null, null, null, null, null, null],
                    "hourglass_wrapper": {
                        "internal_dim": [2, 2, 4, 16, 32, 64],
                        "parallel_internal_dim": [8, 8, 16, 32, 64, 128],
                        "activation": ["relu", "relu", "relu", "relu", "relu", "relu"],
                        "activation2": [null, null, null, null, null, null],
                        "activate_postwrap": true,
                        "activate_residual": false,
                        "regularize": [true, true, true, true, true, true],
                        "W1_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                        "W1_regularizer": [null, null, null, null, null, null],
                        "W1_constraint": [null, null, null, null, null, null],
                        "W2_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                        "W2_regularizer": [null, null, null, null, null, null],
                        "W2_constraint": [null, null, null, null, null, null],
                        "loss_factor": 0.1,
                        "subspace_factor": 0.125,
                        "feature_dim_divisor": 4,
                        "bn": false,
                        "merge_bn": false,
                        "bn_momentum": 0.98,
                        "out_bn": true,
                        "out_bn_momentum": 0.98,
                        "out_activation": "relu"
                    }
                },
                "features_alignment": null,
                "downsampling_filter": "mean",
                "upsampling_filter": "mean",
                "upsampling_bn": true,
                "upsampling_momentum": 0.98,
                "conv1d": false,
                "conv1d_kernel_initializer": "glorot_normal",
                "output_kernel_initializer": "glorot_normal",
                "model_handling": {
                    "summary_report_path": "*/model_summary.log",
                    "training_history_dir": "*/training_eval/history",
                    "class_weight": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                    "training_epochs": 200,
                    "batch_size": 32,
                    "training_sequencer": {
                        "type": "DLSequencer",
                        "random_shuffle_indices": true,
                        "augmentor": {
                            "transformations": [
                                    {
                                        "type": "Rotation",
                                        "axis": [0, 0, 1],
                                        "angle_distribution": {
                                            "type": "uniform",
                                            "start": -3.141592,
                                            "end": 3.141592
                                        }
                                    },
                                    {
                                        "type": "Scale",
                                        "scale_distribution": {
                                            "type": "uniform",
                                            "start": 0.985,
                                            "end": 1.015
                                        }
                                    },
                                    {
                                        "type": "Jitter",
                                        "noise_distribution": {
                                            "type": "normal",
                                            "mean": 0,
                                            "stdev": 0.0033
                                        }
                                    }
                            ]
                        }
                    },
                    "prediction_reducer": {
                        "reduce_strategy" : {
                            "type": "MeanPredReduceStrategy"
                        },
                        "select_strategy": {
                            "type": "ArgMaxPredSelectStrategy"
                        }
                    },
                    "checkpoint_path": "*/checkpoint.weights.h5",
                    "checkpoint_monitor": "loss",
                    "learning_rate_on_plateau": {
                        "monitor": "loss",
                        "mode": "min",
                        "factor": 0.1,
                        "patience": 2000,
                        "cooldown": 5,
                        "min_delta": 0.01,
                        "min_lr": 1e-6
                    }
                },
                "compilation_args": {
                    "optimizer": {
                        "algorithm": "Adam",
                        "learning_rate": {
                            "schedule": "exponential_decay",
                            "schedule_args": {
                                "initial_learning_rate": 1e-2,
                                "decay_steps": 1000,
                                "decay_rate": 0.96,
                                "staircase": false
                            }
                        }
                    },
                    "loss": {
                        "function": "class_weighted_categorical_crossentropy"
                    },
                    "metrics": [
                        "categorical_accuracy"
                    ]
                },
                "architecture_graph_path": "*/model_graph.png",
                "architecture_graph_args": {
                    "show_shapes": true,
                    "show_dtype": true,
                    "show_layer_names": true,
                    "rankdir": "TB",
                    "expand_nested": true,
                    "dpi": 300,
                    "show_layer_activations": true
                }
            },
            "autoval_metrics": null,
            "training_evaluation_metrics": null,
            "training_class_evaluation_metrics": null,
            "training_evaluation_report_path": null,
            "training_class_evaluation_report_path": null,
            "training_confusion_matrix_report_path": null,
            "training_confusion_matrix_plot_path": null,
            "training_class_distribution_report_path": null,
            "training_class_distribution_plot_path": null,
            "training_classified_point_cloud_path": null,
            "training_activations_path": null
        },
        {
            "writer": "PredictivePipelineWriter",
            "out_pipeline": "*/model/PointTransformer.pipe",
            "include_writer": false,
            "include_imputer": true,
            "include_feature_transformer": true,
            "include_miner": true,
            "include_class_transformer": false,
            "include_clustering": false,
            "ignore_predictions": false
        }
    ]
}

The JSON above defines a ConvAutoencPwiseClassif that uses a hierarchical furthest point sampling strategy with a 3D spherical neighborhood to prepare the input for a GroupedPointTransformer-based model. It uses GroupedPointTransformerLayer for feature extraction, FeaturesDownsamplingLayer for downsampling with mean filter, and analogously also FeaturesUpsamplingLayer for mean-based upsampling.

Arguments

– training_type

Typically it should be "base" for neural networks. For further details, read the training strategies section.

– fnames

– random_seed

– model_args

The model specification.

– fnames

– num_classes

– class_names

– pre_processing

– feature_extraction

The definition of the feature extraction operator. A detailed description of the case when "type": "GroupedPointTransformer" is given below. For a description of the case when "type": "PointNet" see the PointNet operator documentation, for the case "type": "KPConv" see the KPConv operator documentation, to mimic a SFL-NET model see the SFL-NET documentation, for the case "type": "LightKPConv" see the LightKPConv operator documentation, and to mimic a PointTransformed model see the PointTransformer documentation.

– operations_per_depth: See KPConv arguments documentation.
– feature_space_dims: See KPConv arguments documentation.
– bn: See KPConv arguments documentation.
– bn_momentum: See KPConv arguments documentation.
– activate: See KPConv arguments documentation.

– init_ftransf_bn: The batch normalization for the feature transform before the grouped point transformer-based feature extraction. It can be enabled with True or disabled with False. Note that it is applied also before any wrapper block (if any).

– init_ftransf_bn_momentum: The momentum governing how to update the standardization parameters for the batch normalization before the grouped point transformer-based feature extraction. See the Hierarchical PointNet bn_momentum documentation for further details.

– groups: The number of groups at each depth. Note that it must be a divisor for the number of channels at that depth.

– dropout_rate: The ratio in \([0, 1]\) governing how many weight encoding units must be randomly disabled during training.

– Q_initializer: The initialization method for the \(\pmb{Q}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– Q_regularizer: The regularization strategy for the \(\pmb{Q}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– Q_constraint: The constraints of the \(\pmb{Q}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– q_initializer: The initialization method for the \(\pmb{q}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– q_regularizer: The regularization method for the \(\pmb{q}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– q_constraint: The constraint method for the \(\pmb{q}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– K_initializer: The initialization method for the \(\pmb{K}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– K_regularizer: The regularization strategy for the \(\pmb{K}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– K_constraint: The constraints of the \(\pmb{K}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– k_initializer: The initialization method for the \(\pmb{k}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– k_regularizer: The regularization method for the \(\pmb{k}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– k_constraint: The constraint method for the \(\pmb{k}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– V_initializer: The initialization method for the \(\pmb{V}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– V_regularizer: The regularization strategy for the \(\pmb{V}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– V_constraint: The constraints of the \(\pmb{V}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– v_initializer: The initialization method for the \(\pmb{v}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– v_regularizer: The regularization method for the \(\pmb{v}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– v_constraint: The constraint method for the \(\pmb{v}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– ThetaA_initializer: The initialization method for the \(\pmb{\Theta_A}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– ThetaA_regularizer: The regularization strategy for the \(\pmb{\Theta_A}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– ThetaA_constraint: The constraints of the \(\pmb{\Theta_A}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– thetaA_initializer: The initialization method for the \(\pmb{\theta_A}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– thetaA_regularizer: The regularization method for the \(\pmb{\theta_A}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– thetaA_constraint: The constraint method for the \(\pmb{\theta_A}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– ThetaTildeA_initializer: The initialization method for the \(\pmb{\widetilde{\Theta}_A}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– ThetaTildeA_regularizer: The regularization strategy for the \(\pmb{\widetilde{\Theta}_A}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– ThetaTildeA_constraint: The constraints of the \(\pmb{\widetilde{\Theta}_A}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– thetaTildeA_initializer: The initialization method for the \(\pmb{\tilde{\theta}_A}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– thetaTildeA_regularizer: The regularization method for the \(\pmb{\tilde{\theta}_A}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– thetaTildeA_constraint: The constraint method for the \(\pmb{\tilde{\theta}_A}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– deltaA_bn_momentum: The momentum for the batch normalization of the multiplier positional encoding. See the Hierarchical PointNet bn_momentum documentation for further details.

– ThetaB_initializer: The initialization method for the \(\pmb{\Theta_B}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– ThetaB_regularizer: The regularization strategy for the \(\pmb{\Theta_B}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– ThetaB_constraint: The constraints of the \(\pmb{\Theta_B}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– thetaB_initializer: The initialization method for the \(\pmb{\theta_B}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– thetaB_regularizer: The regularization method for the \(\pmb{\theta_B}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– thetaB_constraint: The constraint method for the \(\pmb{\theta_B}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– ThetaTildeB_initializer: The initialization method for the \(\pmb{\widetilde{\Theta}_B}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– ThetaTildeB_regularizer: The regularization strategy for the \(\pmb{\widetilde{\Theta}_B}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– ThetaTildeB_constraint: The constraints of the \(\pmb{\widetilde{\Theta}_B}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– thetaTildeB_initializer: The initialization method for the \(\pmb{\tilde{\theta}_B}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– thetaTildeB_regularizer: The regularization method for the \(\pmb{\tilde{\theta}_B}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– thetaTildeB_constraint: The constraint method for the \(\pmb{\tilde{\theta}_B}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– deltaB_bn_momentum: The momentum for the batch normalization of the bias positional encoding. See the Hierarchical PointNet bn_momentum documentation for further details.

– Omega_initializer: The initialization method for the \(\pmb{\Omega}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– ThetaA_regularizer: The regularization strategy for the \(\pmb{\Omega}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– Omega_constraint: The constraints of the \(\pmb{\Omega}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– omega_initializer: The initialization method for the \(\pmb{\omega}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– omega_regularizer: The regularization method for the \(\pmb{\omega}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– omega_constraint: The constraint method for the \(\pmb{\omega}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– OmegaTilde_initializer: The initialization method for the \(\pmb{\widetilde{\Omega}}\) weights matrix of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– OmegaTilde_regularizer: The regularization strategy for the \(\pmb{\widetilde{\Omega}}\) weights matrix of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– OmegaTilde_constraint: The constraints of the \(\pmb{\widetilde{\Omega}}\) weights matrix of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– omegaTilde_initializer: The initialization method for the \(\pmb{\tilde{\omega}}\) weights vector of each GroupedPointTransformer. See the keras documentation on initializers for more details.

– omegaTilde_regularizer: The regularization method for the \(\pmb{\tilde{\omega}}\) weights vector of each GroupedPointTransformer. See the keras documentation on regularizers for more details.

– omegaTilde_constraint: The constraint method for the \(\pmb{\tilde{\omega}}\) weights vector of each GroupedPointTransformer. See the keras documentation on constraints for more details.

– omega_bn_momentum: The momentum for the batch normalization of the weight encoding. See the Hierarchical PointNet bn_momentum documentation for further details.
– unary_convolution_wrapper: It can be used to configure a LightKPconv model that uses shared MLPs to wrap the feature extraction operators like a KPConv model or it can be set to null to use an hourglass_wrapper instead, similar to a SFL-NET model. See the KPConv arguments documentation for further details.
– hourglass_wrapper: The specification of how to use hourglass layers to wrap the feature extraction layers. See the SFL-NET arguments documentation for further details.
– point_transformer_wrapper: The specification of how to use Point Transformer layers to wrap the feature extraction layers (with/out residual block). See the PointTransformer arguments documentation.

– features_alignment

– downsampling_filter

It can be configured to "strided_lightkpconv" (see StridedLightKPConvLayer) but it is also possible to use "strided_kpconv" to use the classical StridedKPConvLayer during downsampling. The FeaturesDownsamplingLayer and InterdimensionalPointTransformerLayer are also supported.

– upsampling_filter

The original upsampling strategy for KPConv and derived architectures is "nearest" (i.e., nearest upsampling). However, in VL3D++ examples we often use "mean" for our baseline models because we found it yields better results. See FeaturesUpsamplingLayer and InterdimensionalPointTransformerLayer for more details.

– upsampling_bn

– upsampling_momentum

– conv1d

Boolean flag governing whether to use unary convolutions (shared MLPs) to wrap the hourglass or not. SFL-NET models use hourglass layers instead (i.e., False), classical KPConv models use shared MLPs instead (i.e., True).

– conv1d_kernel_initializer

– output_kernel_initializer

– model_handling: The model handling specification can be read in the KPConv arguments documentation.
– compilation_args: See KPConv arguments documentation.

– training_evaluation_metrics

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_confusion_matrix_report_path

– training_confusion_matrix_report_plot

– training_class_distribution_report_path

– training_classified_point_cloud_path

– training_activations_path

Hierarchical feature extraction with PointMLP

The ConvAutoencPwiseClassif architecture can be configured using PointMLPLayer as the feature extraction strategy. For further details about the variables see the PointMLPLayer class documentation and the PointMLP paper (Xu Ma et al., 2022).

The JSON below illustrates how to configure PointMLP-based hierarchical feature extractors using the VL3D++ framework.

{
    "in_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/mined/Mar18_train_hsv_std.laz"
    ],
    "out_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/out/pointmlp_dumean_neck_ctxhead/T1/*"
    ],
    "sequential_pipeline": [
        {
            "train": "ConvolutionalAutoencoderPwiseClassifier",
            "training_type": "base",
            "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
            "random_seed": null,
            "model_args": {
                "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
                "num_classes": 11,
                "class_names": ["LowVeg", "ImpSurf", "Vehicle", "UrbanFurni", "Roof", "Facade", "Shrub", "Tree", "Soil/Gravel", "VertSurf", "Chimney"],
                "pre_processing": {
                    "pre_processor": "hierarchical_fpspp",
                    "support_strategy_num_points": 25000,
                    "to_unit_sphere": false,
                    "support_strategy": "fps",
                    "support_strategy_fast": 2,
                    "min_distance": 0.03,
                    "receptive_field_oversampling": {
                        "min_points": 2,
                        "strategy": "nearest",
                        "k": 3,
                        "radius": 0.5
                    },
                    "center_on_pcloud": true,
                    "training_class_distribution": [2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250],
                    "neighborhood": {
                        "type": "sphere",
                        "radius": 5.0,
                        "separation_factor": 0.8
                    },
                    "num_points_per_depth": [4096, 1024, 256, 64, 16],
                    "fast_flag_per_depth": [4, 4, false, false, false],
                    "num_downsampling_neighbors": [1, 16, 16, 16, 16],
                    "num_pwise_neighbors": [16, 16, 16, 16, 16],
                    "num_upsampling_neighbors": [1, 16, 16, 16, 16],
                    "nthreads": -1,
                    "training_receptive_fields_distribution_report_path": null,
                    "training_receptive_fields_distribution_plot_path": null,
                    "training_receptive_fields_dir": null,
                    "receptive_fields_distribution_report_path": null,
                    "receptive_fields_distribution_plot_path": null,
                    "receptive_fields_dir": null,
                    "training_support_points_report_path": null,
                    "support_points_report_path": null
                },
                "feature_extraction": {
                    "type": "PointMLP",
                    "operations_per_depth": [2, 1, 1, 1, 1],
                    "feature_space_dims": [64, 64, 96, 128, 192, 256],
                    "bn": true,
                    "bn_momentum": 0.90,
                    "activate": true,
                    "groups": [4, 4, 4, 4, 4, 4],
                    "Phi_blocks": [2, 2, 2, 2, 2, 2],
                    "Phi_residual_expansion": [2, 2, 2, 2, 2, 2],
                    "Phi_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "Phi_regularizer": [null, null, null, null, null, null],
                    "Phi_constraint": [null, null, null, null, null, null],
                    "Phi_bn": [true, true, true, true, true, true],
                    "Phi_bn_momentum": [0.90, 0.90, 0.90, 0.90, 0.90, 0.90],
                    "Psi_blocks": [2, 2, 2, 2, 2, 2],
                    "Psi_residual_expansion": [2, 2, 2, 2, 2, 2],
                    "Psi_initializer": ["glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform", "glorot_uniform"],
                    "Psi_regularizer": [null, null, null, null, null, null],
                    "Psi_constraint": [null, null, null, null, null, null],
                    "Psi_bn": [true, true, true, true, true, true],
                    "Psi_bn_momentum": [0.90, 0.90, 0.90, 0.90, 0.90, 0.90]
                },
                "features_alignment": null,
                "downsampling_filter": "mean",
                "upsampling_filter": "mean",
                "upsampling_bn": true,
                "upsampling_momentum": 0.90,
                "conv1d": true,
                "conv1d_kernel_initializer": "glorot_normal",
                "neck":{
                    "max_depth": 2,
                    "hidden_channels": [64, 64],
                    "kernel_initializer": ["glorot_uniform", "glorot_uniform"],
                    "kernel_regularizer": [null, null],
                    "kernel_constraint": [null, null],
                    "bn_momentum": [0.90, 0.90],
                    "activation": ["relu", "relu"]
                },
                "output_kernel_initializer": "glorot_normal",
                "contextual_head": {
                    "max_depth": 2,
                    "hidden_channels": [64, 64],
                    "output_channels": [64, 64],
                    "bn": [true, true],
                    "bn_momentum": [0.90, 0.90],
                    "bn_along_neighbors": [true, true],
                    "activation": ["relu", "relu"],
                    "distance": ["euclidean", "euclidean"],
                    "ascending_order": [true, true],
                    "aggregation": ["max", "max"],
                    "initializer": ["glorot_uniform", "glorot_uniform"],
                    "regularizer": [null, null],
                    "constraint": [null, null]
                },
                "model_handling": {
                    "summary_report_path": "*/model_summary.log",
                    "training_history_dir": "*/training_eval/history",
                    "class_weight": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                    "training_epochs": 200,
                    "batch_size": 16,
                    "training_sequencer": {
                        "type": "DLSequencer",
                        "random_shuffle_indices": true,
                        "augmentor": {
                            "transformations": [
                                    {
                                        "type": "Rotation",
                                        "axis": [0, 0, 1],
                                        "angle_distribution": {
                                            "type": "uniform",
                                            "start": -3.141592,
                                            "end": 3.141592
                                        }
                                    },
                                    {
                                        "type": "Scale",
                                        "scale_distribution": {
                                            "type": "uniform",
                                            "start": 0.985,
                                            "end": 1.015
                                        }
                                    },
                                    {
                                        "type": "Jitter",
                                        "noise_distribution": {
                                            "type": "normal",
                                            "mean": 0,
                                            "stdev": 0.0033
                                        }
                                    }
                            ]
                        }
                    },
                    "prediction_reducer": {
                        "reduce_strategy" : {
                            "type": "MeanPredReduceStrategy"
                        },
                        "select_strategy": {
                            "type": "ArgMaxPredSelectStrategy"
                        }
                    },
                    "checkpoint_path": "*/checkpoint.weights.h5",
                    "checkpoint_monitor": "loss",
                    "learning_rate_on_plateau": {
                        "monitor": "loss",
                        "mode": "min",
                        "factor": 0.1,
                        "patience": 2000,
                        "cooldown": 5,
                        "min_delta": 0.01,
                        "min_lr": 1e-6
                    }
                },
                "compilation_args": {
                    "optimizer": {
                        "algorithm": "Adam",
                        "learning_rate": {
                            "schedule": "exponential_decay",
                            "schedule_args": {
                                "initial_learning_rate": 1e-2,
                                "decay_steps": 2250,
                                "decay_rate": 0.96,
                                "staircase": false
                            }
                        }
                    },
                    "loss": {
                        "function": "class_weighted_categorical_crossentropy"
                    },
                    "metrics": [
                        "categorical_accuracy"
                    ]
                },
                "architecture_graph_path": "*/model_graph.png",
                "architecture_graph_args": {
                    "show_shapes": true,
                    "show_dtype": false,
                    "show_layer_names": true,
                    "rankdir": "LR",
                    "expand_nested": false,
                    "dpi": 200,
                    "show_layer_activations": false
                }
            },
            "autoval_metrics": null,
            "training_evaluation_metrics": null,
            "training_class_evaluation_metrics": null,
            "training_evaluation_report_path": null,
            "training_class_evaluation_report_path": null,
            "training_confusion_matrix_report_path": null,
            "training_confusion_matrix_plot_path": null,
            "training_class_distribution_report_path": null,
            "training_class_distribution_plot_path": null,
            "training_classified_point_cloud_path": null,
            "training_activations_path": null
        },
        {
            "writer": "PredictivePipelineWriter",
            "out_pipeline": "*/model/PointMLP.pipe",
            "include_writer": false,
            "include_imputer": true,
            "include_feature_transformer": true,
            "include_miner": true,
            "include_class_transformer": false,
            "include_clustering": false,
            "ignore_predictions": false
        }
    ]
}

The JSON above defines a ConvAutoencPwiseClassif that uses a hierarchical furthest point sampling strategy with a 3D spherical neighborhood to prepare the input for a PointMLP-based model. It uses PointMLPLayer for feature extraction, FeaturesDownsamplingLayer for downsampling with mean filter, analogously also FeaturesUpsamplingLayer for mean-based upsampling, a neck before the head, and a contextual head after the standard segmentation head based on ContextualPointLayer.

Arguments

– training_type

Typically it should be "base" for neural networks. For further details, read the training strategies section.

– fnames

– random_seed

– model_args

The model specification.

– fnames

– num_classes

– class_names

– pre_processing

– feature_extraction

The definition of the feature extraction operator. A detailed description of the case when "type": "PointMLP" is given below. For a description of the case when "type": "PointNet" see the PointNet operator documentation, for the case "type": "KPConv" see the KPConv operator documentation, to mimic a SFL-NET model see the SFL-NET documentation, for the case "type": "LightKPConv" see the LightKPConv operator documentation, to mimic a PointTransformer model see the PointTransformer documentation, and to mimic a GroupedPointTransformer model see the GroupedPointTransformer documentation.

– operations_per_depth: See KPConv arguments documentation.
– feature_space_dims: See KPConv arguments documentation.
– bn: See KPConv arguments documentation.
– bn_momentum: See KPConv arguments documentation.
– activate: See KPConv arguments documentation.
– groups: The number of groups into which divide the features at each depth. Note that it must divide both the number of input and output features.
– Phi_blocks: The number of blocks for the residual shared MLPs at each depth.
– Phi_residual_expansion: The factor multiplying the number of output features in the internal representations at each depth.
– Phi_initializer: The initialization method for the weights of the \(\Phi\) shared MLPs at each depth. See the keras documentation on initializers for more details.
– Phi_regularizer: The regularization method for the weights of the \(\Phi\) shared MLPs at each depth. See the keras documentation on regularizers for more details.
– Phi_constraint: The constraint for the weights of the \(\Phi\) shared MLPs at each depth. See the keras documentation on constraints for more details.
– Phi_bn: Whether to enable the batch normalization for the \(\Phi\) shared MLPs at each depth.
– Phi_bn_momentum: The momentum for the batch normalization of the \(\Phi\) shared MLPs. See the Hierarchical PointNet bn_momentum documentation for further details.
– Psi_blocks: The number of blocks for the final residual shared MLPs \(\Psi\).
– Psi_residual_expansion: The factor multiplying the number of output features in the internal representations of the final residual shared MLPs at each depth.
– Psi_initializer: The initialization method for the weights of the \(\Psi\) shared MLPs at each depth. See the keras documentation on initializers for more details.
– Psi_regularizer: The regularization method for the weights of the \(\Psi\) shared MLPs at each depth. See the keras documentation on regularizers for more details.
– Psi_constraint: The constraint for the weights of the \(\Psi\) shared MLPs at each depth. See the keras documentation on constraints for more details.
– Psi_bn: Whether to enable the batch normalization for the \(\Psi\) shared MLPs at each depth.
– Psi_bn_momentum: The momentum for the batch normalization of the \(\Psi\) shared MLPs. See the Hierarchical PointNet bn_momentum documentation for further details.

– features_alignment

– downsampling_filter

The type of downsampling filter. See StridedKPConvLayer, StridedLightKPConvLayer, FeaturesDownsamplingLayer, and InterdimensionalPointTransformerLayer for more details.

– upsampling_filter

The type of upsampling filter. See FeaturesUpsamplingLayer and InterdimensionalPointTransformerLayer for more details.

– upsampling_bn

Boolean flag to decide whether to enable batch normalization for upsampling transformations.

– upsampling_momentum

Momentum for the moving average of the upsampling batch normalization, such that new_mean = old_mean * momentum + batch_mean * (1-momentum). See the Keras documentation on batch normalization for more details.

– conv1d

Boolean flag governing whether to use unary convolutions (shared MLPs) to wrap the hourglass or not. SFL-NET models use hourglass layers instead (i.e., False), classical KPConv models use shared MLPs instead (i.e., True).

– conv1d_kernel_initializer

The initialization method for the 1D convolutions during upsampling. See the keras documentation on initializers for more details.

– neck

See the neck block documentation.

– output_kernel_initializer

– contextual_head

The specification of the contextual head as specified in the contextual head documentation.

– model_handling

Define how to handle the model, i.e., not the architecture itself but how it must be used. See the description of PointNet model handling for more details.

– compilation_args

– architecture_graph_paths

– architecture_graph_args

– training_evaluation_metrics

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_class_confusion_matrix_report_path

– training_class_confusion_matrix_plot_path

– training_class_distribution_report_path

– training_class_distribution_plot_path

– training_classified_point_cloud_path

– training_activations_path

Hierarchical feature extraction with KPConvX

The ConvAutoencPwiseClassif architecture can be configured using KPConvXLayer as the feature extraction strategy. For further details about the variables see the KPConvXLayer class documentation and the KPConvX paper (Thomas et al., 2024).

The JSON below illustrates how to configure KPConvX-based hierarchical feature extractors using the VL3D++ framework.

{
    "in_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/mined/Mar18_train_hsv_std.laz"
    ],
    "out_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/out/kpconvx_dumean_neck_full_droppath/T1/*"
    ],
    "sequential_pipeline": [
        {
            "train": "ConvolutionalAutoencoderPwiseClassifier",
            "training_type": "base",
            "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
            "random_seed": null,
            "model_args": {
                "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
                "num_classes": 11,
                "class_names": ["LowVeg", "ImpSurf", "Vehicle", "UrbanFurni", "Roof", "Facade", "Shrub", "Tree", "Soil/Gravel", "VertSurf", "Chimney"],
                "pre_processing": {
                    "pre_processor": "hierarchical_fpspp",
                    "support_strategy_num_points": 25000,
                    "to_unit_sphere": false,
                    "support_strategy": "fps",
                    "support_strategy_fast": 2,
                    "min_distance": 0.03,
                    "receptive_field_oversampling": {
                        "min_points": 2,
                        "strategy": "nearest",
                        "k": 3,
                        "radius": 0.5
                    },
                    "center_on_pcloud": true,
                    "training_class_distribution": [2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250],
                    "neighborhood": {
                        "type": "sphere",
                        "radius": 5.0,
                        "separation_factor": 0.8
                    },
                    "num_points_per_depth": [2048, 512, 256, 128, 32],
                    "fast_flag_per_depth": [4, 4, false, false, false],
                    "num_downsampling_neighbors": [1, 12, 16, 20, 20],
                    "num_pwise_neighbors": [12, 16, 20, 20, 20],
                    "num_upsampling_neighbors": [1, 12, 16, 20, 20],
                    "nthreads": -1,
                    "training_receptive_fields_distribution_report_path": null,
                    "training_receptive_fields_distribution_plot_path": null,
                    "training_receptive_fields_dir": null,
                    "receptive_fields_distribution_report_path": null,
                    "receptive_fields_distribution_plot_path": null,
                    "receptive_fields_dir": null,
                    "training_support_points_report_path": null,
                    "support_points_report_path": null
                },
                "feature_extraction": {
                    "type": "KPConvX",
                    "kpconv":{
                        "feature_space_dims": 64,
                        "sigma": 5.0,
                        "kernel_radius": 5.0,
                        "num_kernel_points": 17,
                        "deformable": false,
                        "W_initializer": "he_uniform",
                        "W_regularizer": null,
                        "W_constraint": null,
                        "bn": true,
                        "bn_momentum": 0.90,
                        "activate": true
                    },
                    "operations_per_depth": [1, 1, 1, 1, 1],
                    "drop_path": 0.33,
                    "blocks": [3, 3, 9, 12, 3],
                    "feature_space_dims": [64, 96, 128, 192, 256],
                    "hidden_feature_space_dims": [256, 384, 512, 768, 1024],
                    "sigma": [5.0, 5.0, 5.0, 5.0, 5.0],
                    "shell_radii": [[0, 2.5, 5.0], [0, 2.5, 5.0], [0, 2.5, 5.0], [0, 2.5, 5.0], [0, 2.5, 5.0]],
                    "shell_points": [[1, 14, 28], [1, 14, 28], [1, 14, 28], [1, 14, 28], [1, 14, 28]],
                    "bn": [true, true, true, true, true],
                    "bn_momentum": [0.90, 0.90, 0.90, 0.90, 0.90],
                    "activate": [true, true, true, true, true],
                    "groups": [8, 8, 8, 8, 8],
                    "deformable": [false, false, false, false, false],
                    "initializer": ["he_uniform", "he_uniform", "he_uniform", "he_uniform", "he_uniform"],
                    "regularizer": [null, null, null, null, null],
                    "constraint": [null, null, null, null, null]
                },
                "features_alignment": null,
                "downsampling_filter": "mean",
                "upsampling_filter": "mean",
                "upsampling_bn": true,
                "upsampling_momentum": 0.90,
                "conv1d": false,
                "conv1d_kernel_initializer": "he_uniform",
                "upsampling_kpconvx": {
                    "drop_path": 0.33,
                    "blocks": [1, 1, 1, 1],
                    "hidden_feature_space_dims": [256, 384, 512, 768],
                    "sigma": [5.0, 5.0, 5.0, 5.0],
                    "shell_radii": [[0, 2.5, 5.0], [0, 2.5, 5.0], [0, 2.5, 5.0], [0, 2.5, 5.0]],
                    "shell_points": [[1, 14, 28], [1, 14, 28], [1, 14, 28], [1, 14, 28]],
                    "bn_momentum": [0.90, 0.90, 0.90, 0.90],
                    "activate": [true, true, true, true],
                    "groups": [8, 8, 8, 8],
                    "deformable": [false, false, false, false],
                    "initializer": ["he_uniform", "he_uniform", "he_uniform", "he_uniform"],
                    "regularizer": [null, null, null, null],
                    "constraint": [null, null, null, null]
                },
                "neck":{
                    "max_depth": 2,
                    "hidden_channels": [64, 64],
                    "kernel_initializer": ["he_uniform", "he_uniform"],
                    "kernel_regularizer": [null, null],
                    "kernel_constraint": [null, null],
                    "bn_momentum": [0.90, 0.90],
                    "activation": ["relu", "relu"]
                },
                "output_kernel_initializer": "he_normal",
                "model_handling": {
                    "summary_report_path": "*/model_summary.log",
                    "training_history_dir": "*/training_eval/history",
                    "kpconvx_representation_dir": "*/training_eval/kpconvx_layers/",
                    "class_weight": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                    "training_epochs": 200,
                    "batch_size": 24,
                    "training_sequencer": {
                        "type": "DLSequencer",
                        "random_shuffle_indices": true,
                        "augmentor": {
                            "transformations": [
                                    {
                                        "type": "Rotation",
                                        "axis": [0, 0, 1],
                                        "angle_distribution": {
                                            "type": "uniform",
                                            "start": -3.141592,
                                            "end": 3.141592
                                        }
                                    },
                                    {
                                        "type": "Scale",
                                        "scale_distribution": {
                                            "type": "uniform",
                                            "start": 0.985,
                                            "end": 1.015
                                        }
                                    },
                                    {
                                        "type": "Jitter",
                                        "noise_distribution": {
                                            "type": "normal",
                                            "mean": 0,
                                            "stdev": 0.0033
                                        }
                                    }
                            ]
                        }
                    },
                    "prediction_reducer": {
                        "reduce_strategy" : {
                            "type": "MeanPredReduceStrategy"
                        },
                        "select_strategy": {
                            "type": "ArgMaxPredSelectStrategy"
                        }
                    },
                    "checkpoint_path": "*/checkpoint.weights.h5",
                    "checkpoint_monitor": "loss",
                    "learning_rate_on_plateau": {
                        "monitor": "loss",
                        "mode": "min",
                        "factor": 0.1,
                        "patience": 2000,
                        "cooldown": 5,
                        "min_delta": 0.01,
                        "min_lr": 1e-6
                    }
                },
                "compilation_args": {
                    "optimizer": {
                        "algorithm": "AdamW",
                        "learning_rate": {
                            "schedule": "exponential_decay",
                            "schedule_args": {
                                "initial_learning_rate": 1e-2,
                                "decay_steps": 3333,
                                "decay_rate": 0.96,
                                "staircase": false
                            }
                        }
                    },
                    "loss": {
                        "function": "class_weighted_categorical_crossentropy"
                    },
                    "metrics": [
                        "categorical_accuracy",
                        "f1"
                    ]
                },
                "architecture_graph_path": "*/model_graph.png",
                "architecture_graph_args": {
                    "show_shapes": true,
                    "show_dtype": true,
                    "show_layer_names": true,
                    "rankdir": "TB",
                    "expand_nested": true,
                    "dpi": 300,
                    "show_layer_activations": true
                }
            },
            "autoval_metrics": null,
            "training_evaluation_metrics": null,
            "training_class_evaluation_metrics": null,
            "training_evaluation_report_path": null,
            "training_class_evaluation_report_path": null,
            "training_confusion_matrix_report_path": null,
            "training_confusion_matrix_plot_path": null,
            "training_class_distribution_report_path": null,
            "training_class_distribution_plot_path": null,
            "training_classified_point_cloud_path": null,
            "training_activations_path": null
        },
        {
            "writer": "PredictivePipelineWriter",
            "out_pipeline": "*/model/KPConvX.pipe",
            "include_writer": false,
            "include_imputer": true,
            "include_feature_transformer": true,
            "include_miner": true,
            "include_class_transformer": false,
            "include_clustering": false,
            "ignore_predictions": false
        }
    ]
}

The JSON above defines a ConvAutoencPwiseClassif that uses a hierarchical furthest point sampling strategy with a 3D spherical neighborhood to prepare the input for a KPConvX-based model. It usess KPConvLayer for the initial feature extraction stage, KPConvXLayer with many blocks for encoding feature extraction stages, and a single block KPConvXLayer for decoding feature extraction stages.

Arguments

– training_type

Typically it should be "base" for neural networks. For further details read the training strategies section.

– fnames

– random_seed

– model_args

The model specification.

– fnames

– num_classes

– class_names

– pre_processing

– feature_extraction

The definition of the feature extraction operator. A detailed description of the case when "type": "KPConvX" is given below. For a description of the case when "type": "PointNet" see the PointNet operator documentation, for the case "type": "KPConv" see the KPConv operator documentation, to mimic a SFL-NET model see the SFL-NET documentation, for the case "type": "LightKPConv" see the LightKPConv operator documentation, to mimic a PointTransformer model see the PointTransformer documentation, and to mimic a GroupedPointTransformer model see the GroupedPointTransformer documentation.

– kpconv

The specification for the initial KPConvLayer` feature extractor.

– feature_space_dims: See KPConv arguments documentation.
– sigma: See KPConv arguments documentation.
– kernel_radius: See KPConv arguments documentation.
– num_kernel_points: See KPConv arguments documentation.
– deformable: See KPConv arguments documentation.
– W_initializer: The initialization method for the weights of the initial KPConv. See the keras documentation on initializers for more details.
– W_regularizer: The regularization strategy for weights of the initial KPConv. See the keras documentation on regularizers for more details.
– W_constraint: The constraints of the weights of the initial KPConv. See the keras documentation on constraints for more details.
– bn: See KPConv arguments documentation.
– bn_momentum: See KPConv arguments documentation.
– activate: See KPConv arguments documentation.

– operations_per_depth

How many KPConvXLayer must be placed at each depth of the decoding hierarchy. Note that, contrary to other feature extractors, it is recommended to put exactly one operation per depth and tweak the number of blocks per depth to increase or reduce the depth of each feature extractor.

– drop_path: The probability to ignore (only during training) a block from KPConvXLayer layers. Note that \(0\) means no drop path at all while \(1\) implies dropping all blocks.

– blocks: A list with the number of blocks for each KPConvXLayer at each decoding depth.

– feature_space_dims: See KPConv arguments documentation.

– hidden_feature_space_dims: A list specifying the hidden dimensionality of the feature space at each depth.

– sigma: The influence distance of the kernel points for each KPConvX.

– shell_radii: The radius for each spherical shell composing the structure space (aka support points) of each kernel.

– shell_points: The number of points for each spherical shell composing the structure space (aka support points) of each kernel.

– bn: Whether to enable batch normalization (True) or not (False).

– bn_momentum: Momentum for the moving average of the batch normalization, such that new_mean = old_mean * momentum + batch_mean * (1 - momentum). See the Keras documentation on batch normalization for more details.

– activate: True to activate the output of the KPConvX, False otherwise.

– groups: The number of groups for the input channels. Note that it must divide the dimensionality of the input feature space.

– deformable: Whether the structure space of the KPConvX will be optimized (True) or not (False), for each KPConv.

– initializer: The initialization method for the weights of each KPConvX. See the keras documentation on initializers for more details.

– regularizer: The regularization strategy for weights of each KPConvX. See the keras documentation on regularizers for more details.

– constraint: The constraints of the weights of each KPConvX. See the keras documentation on constraints for more details.

– features_alignment

– downsampling_filter

It can be configured to "strided_lightkpconv" (see StridedLightKPConvLayer) but it is also possible to use "strided_kpconv" to use the classical StridedKPConvLayer during downsampling. The FeaturesDownsamplingLayer and InterdimensionalPointTransformerLayer are also supported.

– upsampling_filter

See FeaturesUpsamplingLayer and InterdimensionalPointTransformerLayer for more details.

– upsampling_bn

– upsampling_momentum

– conv1d

Boolean flag governing whether to use unary convolutions (shared MLPs) to wrap the hourglass or not.

– conv1d_kernel_initializer

– upsampling_kpconvx

The upsampling KPConvXLayer at each depth. Note that it can be null to avoid using KPConvX as decoding feature extractor. Also, the number of upsampling KPConvX layers is the number of encoding KPConvX layers minus one.

– drop_path: See KPConvX arguments documentation.
– blocks: See KPConvX arguments documentation. Note that for the decoder the recommended number of blocks is one.
– hidden_feature_space_dims: See KPConvX arguments documentation.
– sigma: See KPConvX arguments documentation.
– shell_radii: See KPConvX arguments documentation.
– shell_points: See KPConvX arguments documentation.
– bn_momentum: See KPConvX arguments documentation.
– activate: See KPConvX arguments documentation.
– groups: See KPConvX arguments documentation.
– deformable: See KPConvX arguments documentation.
– initializer: See KPConvX arguments documentation.
– regularizer: See KPConvX arguments documentation.
– constraint: See KPConvX arguments documentation.

– neck

See the neck block documentation.

– output_kernel_initializer

– contextual_head

The specification of the contextual head as specified in the contextual head documentation.

– model_handling

Define how to handle the model, i.e., not the architecture itself but how it must be used. See the description of PointNet model handling for more details.

– kpconvx_representation_dir: Path where the plots and CSV data representing the KPConvX kernels will be stored.

– compilation_args

– architecture_graph_paths

– architecture_graph_args

– training_evaluation_metrics

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_class_confusion_matrix_report_path

– training_class_confusion_matrix_plot_path

– training_class_distribution_report_path

– training_class_distribution_plot_path

– training_classified_point_cloud_path

– training_activations_path

Hierarchical feature extraction with ContextNet

The ConvAutoencPwiseClassif architecture can be configured using ContextualPointLayer as the feature extraction strategy. This architecture considers three different levels of contextual information for each point: 1) The global features derived for all the input points (\(\pmb{G} \in \mathbb{R}^{R \times D_H}\)), 2) the local features derived from the topological information of each local neighborhood (\(\mathcal{H} \in \mathbb{R}^{R \times \kappa \times D_H}\)), and 3) the local features derived from topological and geometric information in the local neighborhood, i.e., considering the distances too (\(\mathcal{\widetilde{H}} \in \mathbb{R}^{R \times \kappa \times D_H}\)). Note that this architecture was developed in the context of the VirtuaLearn3D++ framework.

The JSON below illustrates how to configure a ContextNet-based hierarchical feature extractors using the VL3D++ framework.

{
    "in_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/mined/Mar18_train_hsv_std.laz"
    ],
    "out_pcloud": [
        "/ext4/hei/Hessigheim_Benchmark/Epoch_March2018/vl3d/out/contextual_dumean_neck_head/T1/*"
    ],
    "sequential_pipeline": [
        {
            "train": "ConvolutionalAutoencoderPwiseClassifier",
            "training_type": "base",
            "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
            "random_seed": null,
            "model_args": {
                "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
                "num_classes": 11,
                "class_names": ["LowVeg", "ImpSurf", "Vehicle", "UrbanFurni", "Roof", "Facade", "Shrub", "Tree", "Soil/Gravel", "VertSurf", "Chimney"],
                "pre_processing": {
                    "pre_processor": "hierarchical_fpspp",
                    "support_strategy_num_points": 25000,
                    "to_unit_sphere": false,
                    "support_strategy": "fps",
                    "support_strategy_fast": 2,
                    "min_distance": 0.03,
                    "receptive_field_oversampling": {
                        "min_points": 2,
                        "strategy": "nearest",
                        "k": 3,
                        "radius": 0.5
                    },
                    "center_on_pcloud": true,
                    "training_class_distribution": [2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250, 2250],
                    "neighborhood": {
                        "type": "sphere",
                        "radius": 5.0,
                        "separation_factor": 0.8
                    },
                    "num_points_per_depth": [4096, 1024, 256, 64, 16],
                    "fast_flag_per_depth": [4, 4, false, false, false],
                    "num_downsampling_neighbors": [1, 16, 16, 16, 16],
                    "num_pwise_neighbors": [16, 16, 16, 16, 16],
                    "num_upsampling_neighbors": [1, 16, 16, 16, 16],
                    "nthreads": -1,
                    "training_receptive_fields_distribution_report_path": null,
                    "training_receptive_fields_distribution_plot_path": null,
                    "training_receptive_fields_dir": null,
                    "receptive_fields_distribution_report_path": null,
                    "receptive_fields_distribution_plot_path": null,
                    "receptive_fields_dir": null,
                    "training_support_points_report_path": null,
                    "support_points_report_path": null
                },
                "feature_extraction": {
                    "type": "Contextual",
                    "operations_per_depth": [2, 1, 1, 1, 1],
                    "feature_space_dims": [64, 64, 96, 128, 192, 256],
                    "hidden_channels": [128, 128, 192, 256, 384, 512],
                    "bn": [true, true, true, true, true, true],
                    "bn_momentum": [0.95, 0.95, 0.95, 0.95, 0.95, 0.95],
                    "bn_along_neighbors": [true, true, true, true, true, true],
                    "activation": ["relu", "relu", "relu", "relu", "relu", "relu"],
                    "distance": ["euclidean", "euclidean", "euclidean", "euclidean", "euclidean", "euclidean"],
                    "ascending_order": [true, true, true, true, true, true],
                    "aggregation": ["mean", "mean", "mean", "mean", "mean", "mean"],
                    "initializer": ["he_uniform", "he_uniform", "he_uniform", "he_uniform", "he_uniform", "he_uniform"],
                    "regularizer": [null, null, null, null, null, null],
                    "constraint": [null, null, null, null, null, null],
                    "activate": true
                },
                "features_alignment": null,
                "downsampling_filter": "mean",
                "upsampling_filter": "mean",
                "upsampling_bn": true,
                "upsampling_momentum": 0.95,
                "conv1d": true,
                "conv1d_kernel_initializer": "he_uniform",
                "neck":{
                    "max_depth": 2,
                    "hidden_channels": [64, 64],
                    "kernel_initializer": ["he_uniform", "he_uniform"],
                    "kernel_regularizer": [null, null],
                    "kernel_constraint": [null, null],
                    "bn_momentum": [0.95, 0.95],
                    "activation": ["relu", "relu"]
                },
                "contextual_head": {
                    "multihead": false,
                    "max_depth": 2,
                    "hidden_channels": [64, 64],
                    "output_channels": [64, 64],
                    "bn": [true, true],
                    "bn_momentum": [0.95, 0.95],
                    "bn_along_neighbors": [true, true],
                    "activation": ["relu", "relu"],
                    "distance": ["euclidean", "euclidean"],
                    "ascending_order": [true, true],
                    "aggregation": ["mean", "mean"],
                    "initializer": ["he_uniform", "he_uniform"],
                    "regularizer": [null, null],
                    "constraint": [null, null]
                },
                "output_kernel_initializer": "he_normal",
                "model_handling": {
                    "summary_report_path": "*/model_summary.log",
                    "training_history_dir": "*/training_eval/history",
                    "class_weight": [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
                    "training_epochs": 200,
                    "batch_size": 16,
                    "training_sequencer": {
                        "type": "DLSequencer",
                        "random_shuffle_indices": true,
                        "augmentor": {
                            "transformations": [
                                    {
                                        "type": "Rotation",
                                        "axis": [0, 0, 1],
                                        "angle_distribution": {
                                            "type": "uniform",
                                            "start": -3.141592,
                                            "end": 3.141592
                                        }
                                    },
                                    {
                                        "type": "Scale",
                                        "scale_distribution": {
                                            "type": "uniform",
                                            "start": 0.985,
                                            "end": 1.015
                                        }
                                    },
                                    {
                                        "type": "Jitter",
                                        "noise_distribution": {
                                            "type": "normal",
                                            "mean": 0,
                                            "stdev": 0.0033
                                        }
                                    }
                            ]
                        }
                    },
                    "prediction_reducer": {
                        "reduce_strategy" : {
                            "type": "MeanPredReduceStrategy"
                        },
                        "select_strategy": {
                            "type": "ArgMaxPredSelectStrategy"
                        }
                    },
                    "checkpoint_path": "*/checkpoint.weights.h5",
                    "checkpoint_monitor": "loss",
                    "learning_rate_on_plateau": {
                        "monitor": "loss",
                        "mode": "min",
                        "factor": 0.1,
                        "patience": 2000,
                        "cooldown": 5,
                        "min_delta": 0.01,
                        "min_lr": 1e-6
                    }
                },
                "compilation_args": {
                    "optimizer": {
                        "algorithm": "AdamW",
                        "learning_rate": {
                            "schedule": "exponential_decay",
                            "schedule_args": {
                                "initial_learning_rate": 1e-2,
                                "decay_steps": 2500,
                                "decay_rate": 0.96,
                                "staircase": false
                            }
                        }
                    },
                    "loss": {
                        "function": "class_weighted_categorical_crossentropy"
                    },
                    "metrics": [
                        "categorical_accuracy",
                        "f1"
                    ]
                },
                "architecture_graph_path": "*/model_graph.png",
                "architecture_graph_args": {
                    "show_shapes": true,
                    "show_dtype": true,
                    "show_layer_names": true,
                    "rankdir": "TB",
                    "expand_nested": true,
                    "dpi": 300,
                    "show_layer_activations": true
                }
            },
            "autoval_metrics": null,
            "training_evaluation_metrics": null,
            "training_class_evaluation_metrics": null,
            "training_evaluation_report_path": null,
            "training_class_evaluation_report_path": null,
            "training_confusion_matrix_report_path": null,
            "training_confusion_matrix_plot_path": null,
            "training_class_distribution_report_path": null,
            "training_class_distribution_plot_path": null,
            "training_classified_point_cloud_path": null,
            "training_activations_path": null
        },
        {
            "writer": "PredictivePipelineWriter",
            "out_pipeline": "*/model/ContextNet.pipe",
            "include_writer": false,
            "include_imputer": true,
            "include_feature_transformer": true,
            "include_miner": true,
            "include_class_transformer": false,
            "include_clustering": false,
            "ignore_predictions": false
        }
    ]
}

The JSON above defines a ConvAutoencPWiseClassif that uses a hierarchical furthest point sampling strategy with a 3D spherical neighborhood to prepare the input for a ContextNet-based model. It uses ContextualPointLayer for feature extraction, a neck with depth 2, and a contextual head. The decoder uses Shared MLPs (as Conv1D blocks with unitary kernel). Both, downsampling and upsampling, compute the mean value for each local neighborhood.

Arguments

– training_type

Typically it should be "base" for neural networks. For further details read the training strategies section.

– fnames

– random_seed

– model_args

The model specification.

– fnames

– num_classes

– class_names

– pre_processing

– feature_extraction

The definition of the feature extraction operator. A detailed description of the case when "type": "Contextual" is given below. For a description of the case when "type": "PointNet" see the PointNet operator documentation, for the case "type": "KPConv" see the KPConv operator documentation, to mimic a SFL-NET model see the SFL-NET documentation, for the case "type": "LightKPConv" see the LightKPConv operator documentation, to mimic a PointTransformer model see the PointTransformer documentation, to mimic a GroupedPointTransformer model see the GroupedPointTransformer documentation, and to mimic a KPConvX model see the KPConvX documentation.

– operations_per_depth: See KPConv arguments documentation.
– feature_space_dims: See KPConv arguments documentation.
– hidden_channels: A list with the dimensionality of the hidden feature space for each ContextualPointLayer in the encoding hierarchy.
– bn: See KPConv arguments documentation.
– bn_momentum: See KPConv arguments documentation.
– bn_along_neighbors: Whether to compute the match normalization along the neighbors (true) or the feature (false). Note that this applies for tensors such as \(\mathcal{H} \in \mathbb{R}^{R \times \kappa \times D_H}\) or \(\mathcal{\widehat{H}} \in \mathbb{R}^{R \times \kappa \times D_H}\) because they represent \(\kappa\) neighbors for each point.
– activation: A list with the activation function for each contextual point layer. See the keras documentation on activations for more details.
– distance: A list with the distance that must be used at each contextual point layer. Supported values are "euclidean" and "squared".
– ascending_order: Whether to force distance-based ascending order of the neighborhoods (true) or not (false).
– aggregation: A list with the aggregation strategy for each contextual point layer, either "max" or "mean".
– initializer: A list with the initializer for the matrices and vectors of weights. See Keras documentation on layer initializers for further details.
– regularizer: A list with the regularizer for the matrices and vectors of weights. See the keras documentation on regularizers for more details.
– constraint: A list with the constraint for the matrices and vectors of weights. See the keras documentation on constraints for more details.
– activate: See KPConv arguments documentation.

– features_alignment

– downsampling_filter

It can be configured to "strided_lightkpconv" (see StridedLightKPConvLayer) but it is also possible to use "strided_kpconv" to use the classical StridedKPConvLayer during downsampling. The FeaturesDownsamplingLayer and InterdimensionalPointTransformerLayer are also supported.

– upsampling_filter

See FeaturesUpsamplingLayer and InterdimensionalPointTransformerLayer for more details.

– upsampling_bn

– upsampling_momentum

– conv1d

Boolean flag governing whether to use unary convolutions (shared MLPs) to wrap the hourglass or not.

– conv1d_kernel_initializer

– neck

See the neck block documentation.

– contextual_head

The specification of the contextual head as specified in the contextual head documentation.

– output_kernel_initializer

– model_handling

Define how to handle the model, i.e., not the architecture itself but how it must be used. See the description of PointNet model handling for more details.

– compilation_args

– architecture_graph_path

– architecture_graph_args

– training_evaluation_metrics

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_class_confusion_matrix_report_path

– training_class_confusion_matrix_plot_path

– training_class_distribution_report_path

– training_class_distribution_plot_path

– training_classified_point_cloud_path

– training_activations_path

Sparse 3D convolutional point-wise classifier

The SpConv3DPwiseClassif architecture transforms the point cloud through a sparse hierarchical voxelization through the HierarchicalSGPreProcessorPP pre-processor (see the hierarchical sparse grid receptive field documentation). Typically, dense voxelizations representing 3D point clouds demand more memory than available due to the curse of dimensionality. This issue was discussed in the Submanifold Saprse Convolutional Networks and 3D Semantic Segmentation with Submanifold Sparse Convolutional Networks papers by Benjamin Graham et al. In the VirtuaLearn3D++ framework sparse convolutional neural networks are implemented through SpConv3DEncodingLayer and SpConv3DDecodingLayer, built on top of the primitive SubmanifoldSpConv3DLayer, DownsamplingSpConv3DLayer, and UpsamplingSpConv3DLayer. The C++ pre-processor emits, per receptive field per depth, the submanifold (S), downsampling (D), and upsampling (U) dense neighbor tables. The layer math is a single tf.gather + reshape + tf.matmul in active form, where each entry of S / D / U is one-based and the zero sentinel gathers the global ground row. The loss is a standard "sparse_categorical_crossentropy" with sample-weight masking ( no ragged-loss wrapping is used in this new version, yet it was in previous implementations). The DLSparseConcatSequencer pads every batch to a fixed shape. so the model’s tf.function is traced once, not per batch (see the Sparse sequencer documentation). The output head is pinned to dtype='float32' so the framework is safe under Keras 3 mixed-precision policies (mixed_float16 / mixed_bfloat16) and MaskedBatchNormalization computes moments in float32 regardless of the input dtype so the variance reduction does not overflow under 16 bit floating point formats.

The JSON below illustrates how to configure neural networks using 3D convolutions on sparse voxelizations of 3D point clouds. Fields whose value the user does not override fall back to the corresponding config/model.yml default.

{
    "train": "SparseConvolutional3DPwiseClassifier",
    "training_type": "base",
    "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
    "random_seed": null,
    "model_args": {
        "fnames": ["ones", "HSV_Hrad", "HSV_S", "HSV_V"],
        "num_classes": 11,
        "class_names": ["LowVeg", "ImpSurf", "Vehicle", "UrbanFurni", "Roof", "Facade", "Shrub", "Tree", "Soil/Gravel", "VertSurf", "Chimney"],
        "pre_processing": {
            "pre_processor": "hierarchical_sg",
            "support_strategy_num_points": 4096,
            "support_strategy": "fps",
            "support_strategy_fast": 4,
            "center_on_pcloud": true,
            "training_class_distribution": [500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500],
            "neighborhood": {
                "type": "sphere",
                "radius": 16.0,
                "separation_factor": 0.8
            },
            "cell_size": 0.25,
            "submanifold_window": [2, 1, 1, 1],
            "downsampling_window": [2, 2, 2],
            "downsampling_stride": [2, 2, 2],
            "upsampling_window": [2, 2, 2],
            "upsampling_stride": [2, 2, 2],
            "feature_reduce_strategy": "mean",
            "nthreads": -1,
            "training_receptive_fields_distribution_report_path": "*/training_eval/training_receptive_fields_distribution.log",
            "training_receptive_fields_distribution_plot_path": "*/training_eval/training_receptive_fields_distribution.svg",
            "training_receptive_fields_dir": null,
            "receptive_fields_distribution_report_path": null,
            "receptive_fields_distribution_plot_path": null,
            "receptive_fields_dir": null,
            "training_support_points_report_path": "*/training_eval/training_support_points.las",
            "support_points_report_path": null
        },
        "layer_by_layer": false,
        "initial_shared_mlp": true,
        "initial_shared_mlp_initializer": "glorot_normal",
        "initial_shared_mlp_regularizer": null,
        "initial_shared_mlp_constraint": null,
        "initial_shared_mlp_activation": "relu",
        "initial_unactivated_spconv": false,
        "spconvs_per_encoder": 1,
        "submanifold_features": [64, 128, 256, 512],
        "submanifold_initializer": ["glorot_normal", "glorot_normal", "glorot_normal", "glorot_normal"],
        "submanifold_regularizer": [null, null, null, null],
        "submanifold_constraint": [null, null, null, null],
        "submanifold_bn_momentum": [0.9, 0.9, 0.9, 0.9],
        "downsampling_initializer": ["glorot_normal", "glorot_normal", "glorot_normal"],
        "downsampling_regularizer": [null, null, null],
        "downsampling_constraint": [null, null, null],
        "downsampling_bn_momentum": [0.9, 0.9, 0.9],
        "upsampling_initializer": ["glorot_normal", "glorot_normal", "glorot_normal"],
        "upsampling_regularizer": [null, null, null],
        "upsampling_constraint": [null, null, null],
        "upsampling_bn_momentum": [0.9, 0.9, 0.9],
        "upsampling_shared_mlp_initializer": ["glorot_normal", "glorot_normal", "glorot_normal"],
        "upsampling_shared_mlp_regularizer": [null, null, null],
        "upsampling_shared_mlp_constraint": [null, null, null],
        "upsampling_shared_mlp_activation": ["relu", "relu", "relu"],
        "upsampling_shared_mlp_bn_momentum": [0.9, 0.9, 0.9],
        "feature_dim_divisor": 2,
        "dim_transform_kernel_initializer": "glorot_normal",
        "dim_transform_kernel_regularizer": null,
        "dim_transform_kernel_constraint": null,
        "dim_transform_activation": "relu",
        "dim_transform_bn_momentum": 0.9,
        "residual_strategy": "sharedmlp",
        "post_residual_shared_mlp": false,
        "residual_shared_mlp_kernel_initializer": "glorot_normal",
        "residual_shared_mlp_kernel_regularizer": null,
        "residual_shared_mlp_kernel_constraint": null,
        "residual_shared_mlp_activation": "relu",
        "output_kernel_initializer": "glorot_normal",
        "output_kernel_regularizer": null,
        "output_kernel_constraint": null,
        "model_handling": {
            "summary_report_path": "*/model_summary.log",
            "training_history_dir": "*/training_eval/history",
            "features_structuring_representation_dir": null,
            "class_weight": null,
            "training_epochs": 200,
            "batch_size": 4,
            "training_sequencer": {
                "type": "DLSparseConcatSequencer",
                "random_shuffle_indices": true,
                "ignore_labels": null
            },
            "prediction_reducer": {
                "reduce_strategy" : {
                    "type": "MeanPredReduceStrategy"
                },
                "select_strategy": {
                    "type": "ArgMaxPredSelectStrategy",
                    "disabled_classes": null
                }
            },
            "checkpoint_path": "*/checkpoint.weights.h5",
            "checkpoint_monitor": "loss",
            "learning_rate_on_plateau": {
                "monitor": "loss",
                "mode": "min",
                "factor": 0.1,
                "patience": 2000,
                "cooldown": 5,
                "min_delta": 0.01,
                "min_lr": 1e-6
            }
        },
        "compilation_args": {
            "optimizer": {
                "algorithm": "Adam",
                "learning_rate": {
                    "schedule": "exponential_decay",
                    "schedule_args": {
                        "initial_learning_rate": 1e-2,
                        "decay_steps": 2500,
                        "decay_rate": 0.96,
                        "staircase": false
                    }
                }
            },
            "loss": {
                "function": "sparse_categorical_crossentropy"
            },
            "metrics": [
                "sparse_categorical_accuracy"
            ]
        },
        "architecture_graph_path": "*/model_graph.png",
        "architecture_graph_args": {
            "show_shapes": true,
            "show_dtype": true,
            "show_layer_names": true,
            "rankdir": "TB",
            "expand_nested": true,
            "dpi": 300,
            "show_layer_activations": true
        }
    },
    "autoval_metrics": null,
    "training_evaluation_metrics": null,
    "training_class_evaluation_metrics": null,
    "training_evaluation_report_path": null,
    "training_class_evaluation_report_path": null,
    "training_confusion_matrix_report_path": null,
    "training_confusion_matrix_plot_path": null,
    "training_class_distribution_report_path": null,
    "training_class_distribution_plot_path": null,
    "training_classified_point_cloud_path": null,
    "training_activations_path": null
}

The JSON above defines a SpConv3DPwiseClassif that uses a hierarchical sparse 3D grid to represent spherical neighborhoods with radius of \(16\) meters in a 3D point cloud. It has a max depth of four with \(64\) output features in the first level and \(512\) in the lowest one. Channel widths are controlled by submanifold_features.

The model uses the DLSparseConcatSequencer, which concatenates per-batch-element receptive fields into a single global feature tensor with offset-adjusted dense neighbor tables. The compiled neural network performs the convolutions in a single tf.gather + reshape + tf.matmul pass operating on every active cell of every receptive field in the batch simultaneously, so MaskedBatchNormalization sees one global (active_cells, channels) matrix per BN sublayer. Padded cells are masked out of the batch statistics so the running mean and variance are not biased by the padding ratio.

Arguments

– training_type

Typically it should be "base" for neural networks. For further details, read the training strategies section.

– fnames

– random_seed

– model_args

The model specification.

– fnames

– num_classes

– class_names

– pre_processing

The hierarchical sparse 3D convolutional model demands a hierarchical sparse grid as its receptive-field strategy. See the hierarchical SG documentation for the full list of pre-processor kwargs (pre_processor, support_strategy, neighborhood, cell_size, submanifold_window / downsampling_window / upsampling_window / strides, feature_reduce_strategy, the receptive-field report / dir knobs, etc.). The submanifold_window / downsampling_window / upsampling_window lists determine the hierarchy depth and the per-level kernel-position counts. They must be consistent with the per-level lengths of the model_args lists (submanifold_features, downsampling_initializer, upsampling_initializer, …).

– layer_by_layer

Must be false. The only supported build path is the fused encoder / decoder one (SpConv3DEncodingLayer + SpConv3DDecodingLayer). Setting this to true raises a DeepLearningException. The field is kept so older pipeline JSONs still parse, but its value has no effect beyond the loud-failure guard.

– initial_shared_mlp

Whether to apply a shared MLP to the input data to transform it before computing the sparse convolutional hierarchy (true) or not (false).

– initial_shared_mlp_initializer

The initialization method for the initial SharedMLP. See the keras documentation on initializers for more details.

– initial_shared_mlp_regularizer

The regularization strategy for the weights of the initial SharedMLP. See the keras documentation on regularizers for more details.

– initial_shared_mlp_constraint

The constraints of the weights of the initial SharedMLP. See the keras documentation on constraints for more details.

– initial_shared_mlp_activation

The activation function for the initial SharedMLP. See the keras documentation on activations for more details.

– initial_unactivated_spconv

Whether to apply a sparse convolution before the activation of the input (true) or not (false).

– spconvs_per_encoder

Integer governing how many sparse convolutions compute for each encoding block.

– submanifold_features

List of integers governing how many output features are generated through sparse submanifold convolutions at each level of the hierarchy. This is the channel-width knob. The number of kernel positions at each level is derived from the convolutional window sizes (submanifold_window / downsampling_window / upsampling_window in the pre-processor) and is not configurable directly: \((2 w_t + 1)^3\) for submanifold, \((w^D_t)^3\) for downsampling, \((w^U_t)^3\) for upsampling.

– submanifold_initializer

List with the initializer for the weights of each sparse submanifold convolution in the hierarchy. See the keras documentation on initializers for more details.

– submanifold_regularizer

List with the regularizer for the weights of each sparse submanifold convolution in the hierarchy. See the keras documentation on regularizers for more details.

– submanifold_constraint

List with the constraints for the weights of each sparse submanifold convolution in the hierarchy. See the keras documentation on constraints for more details.

– submanifold_bn_momentum

Momentum for the moving average of the batch normalization.

– downsampling_initializer

List with the initializer for the weights of each sparse downsampling convolution in the hierarchy. See the keras documentation on initializers for more details.

– downsampling_regularizer

List with the regularizer for the weights of each sparse downsampling convolution in the hierarchy. See the keras documentation on regularizers for more details.

– downsampling_constraint

List with the constraints for the weights of each sparse downsampling convolution in the hierarchy. See the keras documentation on constraints for more details.

– downsampling_bn_momentum

List with the momentum for the moving average of the batch normalization for each sparse downsampling convolution.

– upsampling_initializer

List with the initializer for the weights of each sparse upsampling convolution in the hierarchy. See the keras documentation on initializers for more details.

– upsampling_regularizer

List with the regularizer for the weights of each sparse upsampling convolution in the hierarchy. See the keras documentation on regularizers for more details.

– upsampling_constraint

List with the constraints for the weights of each sparse upsampling convolution in the hierarchy. See the keras documentation on constraints for more details.

– upsampling_bn_momentum

List with the momentum for the moving average of the batch normalization for each sparse upsampling convolution.

– upsampling_shared_mlp_initializer

List with the initializer for the SharedMLP of each upsampling block in the hierarchy. See the keras documentation on initializers for more details.

– upsampling_shared_mlp_regularizer

List with the regularizer for the SharedMLP of each upsampling block in the hierarchy. See the keras documentation on regularizers for more details.

– upsampling_shared_mlp_constraint

List with the constraints for the SharedMLP of each upsampling block in the hierarchy. See the keras documentation on constraints for more details.

– upsampling_shared_mlp_activation

List with the activation function for the SharedMLP of each upsampling block in the hierarchy. See the keras documentation on activations for more details.

– upsampling_shared_mlp_bn_momentum

List with the momentum for the moving average of the batch normalization for the SharedMLP of each upsampling block.

– feature_dim_divisor

The divisor for the dimensionality of the feature space governing how the wrappers transform the dimensionality before the convolutions. Typically the feature dim divisor reduces the dimensionality (often to its half value) at the pre-wrapper before the convolutions and then it is restored by post-wrapper after the convolutions.

– dim_transform_kernel_initializer

The initializer for the wrapper dimensionality transformation. See the keras documentation on initializers for more details.

– dim_transform_kernel_regularizer

The regularizer for the wrapper dimensionality transformation. See the keras documentation on regularizers for more details.

– dim_transform_kernel_constraint

The constraints for the wrapper dimensionality transformation. See the keras documentation on constraints for more details.

– dim_transform_activation

The activation function for the wrapper dimensionality transformation. See the keras documentation on activations for more details.

– dim_transform_bn_momentum

The momentum for the moving average of the batch normalization for the wrapper dimensionality transformation.

– residual_strategy

The type of layer to be used in the residual blocks at each level of the hierarchy. Supported values:

"sharedmlp" (or its alias "conv1d") — use a Shared MLP in the residual branch.
"ssc3d" — use a submanifold sparse convolution.
null (or the string "null") — disable the residual branch entirely.

– post_residual_shared_mlp

Whether to apply a SharedMLP after the residual block (true) or not (false).

– residual_shared_mlp_kernel_initializer

The initializer for the residual SharedMLP kernel (and for the post-residual SharedMLP kernel when post_residual_shared_mlp=true). See the keras documentation on initializers for more details.

– residual_shared_mlp_kernel_regularizer

The regularizer for the residual SharedMLP kernel (reused for the post-residual MLP kernel when enabled). See the keras documentation on regularizers for more details.

– residual_shared_mlp_kernel_constraint

The constraints for the residual SharedMLP kernel (reused for the post-residual MLP kernel when enabled). See the keras documentation on constraints for more details.

– residual_shared_mlp_activation

The activation function for the residual SharedMLP (and for the post-residual SharedMLP when post_residual_shared_mlp=true). See the keras documentation on activations for more details.

– output_kernel_initializer

The initializer for the final output Dense layer (softmax / sigmoid head). See the keras documentation on initializers for more details.

– output_kernel_regularizer

The regularizer for the final output Dense layer. See the keras documentation on regularizers for more details.

– output_kernel_constraint

The constraint for the final output Dense layer. See the keras documentation on constraints for more details.

– model_handling

The model handling specification is the same as the PointNet model handling specification with one constraint: training_sequencer.type must be "DLSparseConcatSequencer" (the only sequencer compatible with the static-shape padding contract; see the Sparse sequencer documentation for the full pad / mask layout).

The DLSparseConcatSequencer accepts two extra knobs on top of the base sequencer contract:

– random_shuffle_indices: When true, the order of the receptive fields is shuffled at the end of every training epoch so that the same RFs do not always land in the same batch.
– ignore_labels: Optional list of integer label values that should be excluded from the training loss / metric. Cells whose label is in the list get sample_weight = 0.0 in the emitted training tuple, so they never contribute to the gradient. Useful for “unclassified” placeholders or for domain-irrelevant classes the user wants to keep in the input (so the network still gets to see their geometry as neighbors) but not in the loss. Defaults to null (no masking). Every value listed here must be representable in the dtype of the bound label array: the sequencer checks this at set_input_data time across all per-RF label arrays and raises a DeepLearningException on a narrowing cast (e.g., 255 in an int8 label array would silently wrap to -1 without the check).
– disabled_classes: Optional list of integer class indices whose columns are masked to \(-\infty\) before the argmax. Useful for blocking the model from ever predicting a “sink” class at inference (e.g., the “unclassified” sink that low-signal cells fall into) without having to retrain. Out-of-range and negative entries are silently ignored. Defaults to null (no masking).

– compilation_args

See the PointNet compilation args documentation.

– architecture_graph_path

– architecture_graph_args

– training_evaluation_metrics

– training_class_evaluation_metrics

– training_evaluation_report_path

– training_class_evaluation_report_path

– training_confusion_matrix_report_path

– training_confusion_matrix_report_plot

– training_class_distribution_report_path

– training_classified_point_cloud_path

– training_activations_path