CNum 0.2.1
CPU-optimized ML library for C++
Loading...
Searching...
No Matches
GBModel.h
Go to the documentation of this file.
1#ifndef GB_REGRESSOR_H
2#define GB_REGRESSOR_H
3
4#include "CNum/Utils/Utils.h"
5#include "CNum/Data/Data.h"
6#include "CNum/Model/Model.h"
8#include "json.hpp"
9#include <variant>
10
12 struct Split;
13 using json = ::nlohmann::json;
14 using SubsampleFunction = ::std::function< void(size_t *, size_t, size_t, size_t, ::CNum::DataStructs::Matrix<double>) >;
15
16 inline SubsampleFunction default_subsample = [] (size_t *pos_ptr,
17 size_t low,
18 size_t high,
19 size_t n_samples,
20 const ::CNum::DataStructs::Matrix<double> y) -> void {
21 if (low == 0 && high == n_samples) {
22 ::std::iota(pos_ptr, pos_ptr + n_samples, low);
23 } else {
24 ::CNum::Utils::Rand::generate_n_unique_rand_in_range<size_t>(low, high - 1, pos_ptr, n_samples, 1);
25 }
26 };
27
34 enum SplitAlg {
37 };
38
39 struct TreeBoosterNode;
40
51 template <typename TreeType>
52 class GBModel {
53 private:
54 TreeType *_trees;
55 ::std::string _loss_type;
58 double _learning_rate;
59 size_t _n_learners;
60 double _subsample;
61 int _max_depth;
62 int _min_samples;
63 double _weight_decay;
64 ::std::string _activation;
65 double _reg_lambda;
66 double _gamma;
67 SplitAlg _sa;
68 SubsampleFunction _subsample_function;
69
74 static TreeBoosterNode *parse_learner(json node);
75
77 void copy_hyperparams(const GBModel &other) noexcept;
78
80 void copy(const GBModel &other) noexcept;
81
83 void move(GBModel &&other) noexcept;
84
85 public:
107 * in the resultant data partitions are both greater than weight_decay.
108 *
109 * See TreeBoosterNode to understand how the regularization parameters are used
110 */
111 GBModel(std::string lt = "MSE",
112 int n_learners = 200,
113 double lr = 0.1,
114 double ss = 0.25,
115 int md = 5,
116 int ms = 3,
117 SplitAlg sa = HIST,
118 ::std::string activation_func = "",
119 double weight_decay = 0.0,
120 double rl = 1.0,
121 double gamma = 0.0,
127
129 * @param ss The amount of the dataset to sample for each tree
130 * @param md The max depth of each tree
131 * @param ms The minimum number of samples required to continue building a tree
132 * @param sa Split Algorithm; The method used to find the best splits at each
133 * node (GREEDY or HIST)
134 * @param activation_func The activation function to apply after making predictions
135 * @param weight_decay A threshold used to detirmine whether a split is worth
136 * considering
137 * @param rl Reg Lambda; A regularization parameter used in gain calculations
138 * @param gamma A regularization parameter used in gain calculations
139 * @param ssf The function used to take subsamples of the data that a TreeBooster
140 * is trained on
141 *
142 *
143 * In the tree building process a split is only taken if the sum of the hessians
144 * in the resultant data partitions are both greater than weight_decay.
145 *
146 * See TreeBoosterNode to understand how the regularization parameters are used
147 */
149 int n_learners,
150 double lr,
151 double ss,
152 int md,
153 int ms,
154 SplitAlg sa,
156 double weight_decay,
157 double rl,
158 double gamma,
160
161
163 GBModel(const GBModel &other) noexcept;
164
166 GBModel<TreeType> &operator=(const GBModel &other) noexcept;
167
169 GBModel(GBModel &&other) noexcept;
170
172 GBModel<TreeType> &operator=(GBModel &&other) noexcept;
173
175 ~GBModel();
176
182 bool verbose = true);
183
188
191 void save_model(std::string path);
192
196 static GBModel<TreeType> load_model(std::string path);
197 };
198
199#include "GBModel.tpp"
200};
201
202#endif
2d array abstraction
Definition Matrix.h:43
GBModel< TreeType > & operator=(const GBModel &other) noexcept
Copy assignment.
Definition GBModel.h:112
void fit(::CNum::DataStructs::Matrix< double > &X, ::CNum::DataStructs::Matrix< double > &y, bool verbose=true)
Train the model.
Definition GBModel.h:138
static GBModel< TreeType > load_model(std::string path)
Load Model from JSON encoded ".cmod" file.
Definition GBModel.h:270
GBModel(std::string lt="MSE", int n_learners=200, double lr=0.1, double ss=0.25, int md=5, int ms=3, SplitAlg sa=HIST, ::std::string activation_func="", double weight_decay=0.0, double rl=1.0, double gamma=0.0, SubsampleFunction ssf=default_subsample)
Overloaded default constructor.
~GBModel()
Destructor.
Definition GBModel.h:129
void save_model(std::string path)
Save Model to JSON encoded ".cmod" file.
Definition GBModel.h:222
GBModel(::CNum::Model::Loss::LossProfile loss_profile, int n_learners, double lr, double ss, int md, int ms, SplitAlg sa, ::CNum::Model::Activation::ActivationFunc activation_func, double weight_decay, double rl, double gamma, SubsampleFunction ssf=default_subsample)
Parameterized constructor.
::CNum::DataStructs::Matrix< double > predict(::CNum::DataStructs::Matrix< double > &data)
Inference (making predictions).
Definition GBModel.h:202
A node used in a TreeBooster used for gather and storing information about the decision making proces...
Definition TreeBoosterNode.h:25
std::function< double(double) > ActivationFunc
Definition Activation.h:16
Tree-based models.
Definition GBModel.h:11
::nlohmann::json json
Definition GBModel.h:13
::std::function< void(size_t *, size_t, size_t, size_t, ::CNum::DataStructs::Matrix< double >) > SubsampleFunction
Definition GBModel.h:14
SplitAlg
The algorithm used for tree finding splits in tree building.
Definition GBModel.h:34
@ HIST
Definition GBModel.h:36
@ GREEDY
Definition GBModel.h:35
SubsampleFunction default_subsample
Definition GBModel.h:16
void generate_n_unique_rand_in_range(size_t low_bound, size_t high_bound, T *out, size_t n, uint64_t logical_id=0)
Generate n unique random integers.
Definition RandUtils.h:3
The loss, gradient, and hessian functions associated with a loss function.
Definition Loss.h:22
Holds data associated with the decision making process in a TreeBoosterNode.
Definition TreeDefs.h:50