CNum 0.2.1
CPU-optimized ML library for C++
Loading...
Searching...
No Matches
Data.h
Go to the documentation of this file.
1#ifndef DATA_H
2#define DATA_H
3
5
6#include <string>
7#include <memory>
8#include <fstream>
9#include <sstream>
10#include <array>
11
16namespace CNum::Data {
21 struct Bin {
22 uint32_t num;
23 size_t ct;
24 Bin() : ct(0) {}
25 };
26
31 struct Shelf {
32 size_t num_bins;
33 std::unique_ptr<Bin[]> bins;
34 std::unique_ptr<double[]> ranges;
35
36 Shelf() : num_bins(0) {}
37 Shelf(size_t nb)
38 : num_bins(nb),
39 bins(std::make_unique<Bin[]>(nb)),
40 ranges(std::make_unique<double[]>(nb - 1)) {}
41
42 Shelf &operator=(Shelf &&other) {
43 if (this == &other)
44 return *this;
45
46 num_bins = other.num_bins;
47
48 bins.reset();
49 ranges.reset();
50
51 bins = std::move(other.bins);
52 ranges = std::move(other.ranges);
53
54 return *this;
55 }
56 };
57
62 std::array< CNum::DataStructs::Matrix<double>, 2 > get_data(std::string data_path, char seperator = ',');
63
67 void PCA(std::string input_path, std::string output_path);
68
73 std::shared_ptr<Shelf[]> uniform_bin(const CNum::DataStructs::Matrix<double> &data, size_t num_bins = 256);
74
82 std::shared_ptr<Shelf[]> quantile_bin(const CNum::DataStructs::Matrix<double> &data, size_t num_bins = 256);
83
89};
90
91#endif
2d array abstraction
Definition Matrix.h:43
Tools used for gathering and grouping datasets.
void PCA(std::string input_path, std::string output_path)
Principle component analysis.
CNum::DataStructs::Matrix< int > apply_quantile(const CNum::DataStructs::Matrix< double > &data, std::shared_ptr< Shelf[]> shelves)
Construct data matrix of bin values.
std::shared_ptr< Shelf[]> quantile_bin(const CNum::DataStructs::Matrix< double > &data, size_t num_bins=256)
Quantile sketch not exact quantile bins.
std::array< CNum::DataStructs::Matrix< double >, 2 > get_data(std::string data_path, char seperator=',')
Get data from a _SV file with last column being the labels.
std::shared_ptr< Shelf[]> uniform_bin(const CNum::DataStructs::Matrix< double > &data, size_t num_bins=256)
Uniform binning of data.
A bin for quantile and uniform binning.
Definition Data.h:21
Bin()
Definition Data.h:24
uint32_t num
Definition Data.h:22
size_t ct
Definition Data.h:23
Contains bins and the ranges of values they represent.
Definition Data.h:31
Shelf & operator=(Shelf &&other)
Definition Data.h:42
std::unique_ptr< Bin[]> bins
Definition Data.h:33
Shelf(size_t nb)
Definition Data.h:37
size_t num_bins
Definition Data.h:32
std::unique_ptr< double[]> ranges
Definition Data.h:34
Shelf()
Definition Data.h:36