QuickRank  v2.0
QuickRank: A C++ suite of Learning to Rank algorithms
rtnode.h
Go to the documentation of this file.
1 /*
2  * QuickRank - A C++ suite of Learning to Rank algorithms
3  * Webpage: http://quickrank.isti.cnr.it/
4  * Contact: quickrank@isti.cnr.it
5  *
6  * Unless explicitly acquired and licensed from Licensor under another
7  * license, the contents of this file are subject to the Reciprocal Public
8  * License ("RPL") Version 1.5, or subsequent versions as allowed by the RPL,
9  * and You may not copy or use this file in either source code or executable
10  * form, except in compliance with the terms and conditions of the RPL.
11  *
12  * All software distributed under the RPL is provided strictly on an "AS
13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, AND
14  * LICENSOR HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
15  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
16  * PURPOSE, QUIET ENJOYMENT, OR NON-INFRINGEMENT. See the RPL for specific
17  * language governing rights and limitations under the RPL.
18  *
19  * Contributor:
20  * HPC. Laboratory - ISTI - CNR - http://hpc.isti.cnr.it/
21  */
22 #pragma once
23 
24 #include <string>
25 
27 #include "types.h"
28 #include "pugixml/src/pugixml.hpp"
29 
30 #ifdef QUICKRANK_PERF_STATS
31 #include <atomic>
32 #endif
33 
34 static const size_t uint_max = (size_t) -1;
35 
36 class RTNode {
37 
38  public:
39  size_t *sampleids = NULL;
40  size_t nsampleids = 0;
41  float threshold = 0.0f;
42  double deviance = 0.0;
43  double avglabel = 0.0;
44  RTNode *left = NULL;
45  RTNode *right = NULL;
47 
48  private:
49  size_t featureidx = uint_max; //refer the index in the feature matrix
50  size_t featureid = uint_max; //refer to the id occurring in the dataset file
51 
52 #ifdef QUICKRANK_PERF_STATS
53  // number of internal nodes traversed
54  static std::atomic<std::uint_fast64_t> _internal_nodes_traversed;
55 #endif
56 
57  public:
58  // new leaf
59  RTNode(double prediction) {
60  avglabel = prediction;
61  /*
62  featureidx = uint_max;
63  featureid = uint_max;
64  sampleids = NULL;
65  nsampleids = 0;
66  deviance = -1;
67  hist = NULL;
68  left = NULL;
69  right = NULL;
70  */
71  }
72 
73  RTNode(size_t *new_sampleids, size_t new_nsampleids,
74  double prediction) {
75  sampleids = new_sampleids;
76  nsampleids = new_nsampleids;
77  avglabel = prediction;
78  }
79 
80  // new node
81  RTNode(float new_threshold, size_t new_featureidx,
82  size_t new_featureid, RTNode *new_left, RTNode *new_right) {
83  threshold = new_threshold;
84  featureidx = new_featureidx;
85  featureid = new_featureid;
86  left = new_left;
87  right = new_right;
88  /*
89  sampleids = NULL;
90  nsampleids = 0;
91  deviance = -1;
92  hist = NULL;
93  avglabel = 0.0;
94  */
95  }
96 
97  RTNode(size_t *new_sampleids, RTNodeHistogram *new_hist) {
98  hist = new_hist;
99  sampleids = new_sampleids;
100  nsampleids = hist->count[0][hist->thresholds_size[0] - 1];
101  double sumlabel = hist->sumlbl[0][hist->thresholds_size[0] - 1];
102  avglabel = nsampleids ? sumlabel / (double) nsampleids : 0.0;
103  deviance = hist->squares_sum_
104  - hist->sumlbl[0][hist->thresholds_size[0] - 1]
105  * hist->sumlbl[0][hist->thresholds_size[0] - 1]
106  / (double) hist->count[0][hist->thresholds_size[0] - 1];
107  }
108 
110  if (left)
111  delete left;
112  if (right)
113  delete right;
114  }
115  void set_feature(size_t fidx, size_t fid) {
116  //if(fidx==uint_max or fid==uint_max) exit(7);
117  featureidx = fidx, featureid = fid;
118  }
119  size_t get_feature_id() {
120  return featureid;
121  }
122  size_t get_feature_idx() {
123  return featureidx;
124  }
125 
126  void save_leaves(RTNode **&leaves, size_t &nleaves,
127  size_t &capacity);
128 
129  bool is_leaf() const {
130  return featureidx == uint_max;
131  }
132 
134  const size_t next_fx_offset) const {
135  /*if (featureidx == uint_max)
136  std::cout << avglabel << std::endl;
137  else
138  std::cout << d[featureidx * offset] << "<=" << threshold << std::endl;
139  */
140  quickrank::Score score =
141  featureidx == uint_max ?
142  avglabel :
143  (d[featureidx * next_fx_offset] <= threshold ?
144  left->score_instance(d, next_fx_offset) :
145  right->score_instance(d, next_fx_offset));
146 #ifdef QUICKRANK_PERF_STATS
147  if (featureidx != uint_max)
148  _internal_nodes_traversed.fetch_add(1, std::memory_order_relaxed);
149 #endif
150  return score;
151  }
152 
153 #ifdef QUICKRANK_PERF_STATS
154  static void clean_stats() {
155  _internal_nodes_traversed = 0;
156  }
157 
158  static unsigned long long internal_nodes_traversed() {
159  return _internal_nodes_traversed;
160  }
161 #endif
162 
163  pugi::xml_node append_xml_model(pugi::xml_node parent,
164  const std::string &pos = "") const;
165 
166  static RTNode *parse_xml(const pugi::xml_node &split_xml);
167 };
RTNodeHistogram * hist
Definition: rtnode.h:46
RTNode(size_t *new_sampleids, RTNodeHistogram *new_hist)
Definition: rtnode.h:97
size_t const * thresholds_size
Definition: rtnode_histogram.h:29
size_t * sampleids
Definition: rtnode.h:39
bool is_leaf() const
Definition: rtnode.h:129
double squares_sum_
Definition: rtnode_histogram.h:34
size_t get_feature_idx()
Definition: rtnode.h:122
RTNode(double prediction)
Definition: rtnode.h:59
static RTNode * parse_xml(const pugi::xml_node &split_xml)
Definition: rtnode.cc:78
double ** sumlbl
Definition: rtnode_histogram.h:32
RTNode * left
Definition: rtnode.h:44
RTNode(float new_threshold, size_t new_featureidx, size_t new_featureid, RTNode *new_left, RTNode *new_right)
Definition: rtnode.h:81
float Feature
data type for instance predicted label
Definition: types.h:31
RTNode(size_t *new_sampleids, size_t new_nsampleids, double prediction)
Definition: rtnode.h:73
pugi::xml_node append_xml_model(pugi::xml_node parent, const std::string &pos="") const
Definition: rtnode.cc:48
quickrank::Score score_instance(const quickrank::Feature *d, const size_t next_fx_offset) const
Definition: rtnode.h:133
size_t featureid
Definition: rtnode.h:50
double avglabel
Definition: rtnode.h:43
double Score
data type for instance truth label
Definition: types.h:30
void save_leaves(RTNode **&leaves, size_t &nleaves, size_t &capacity)
Definition: rtnode.cc:34
size_t nsampleids
Definition: rtnode.h:40
double deviance
Definition: rtnode.h:42
Definition: rtnode_histogram.h:26
~RTNode()
Definition: rtnode.h:109
size_t get_feature_id()
Definition: rtnode.h:119
Definition: rtnode.h:36
float threshold
Definition: rtnode.h:41
void set_feature(size_t fidx, size_t fid)
Definition: rtnode.h:115
size_t ** count
Definition: rtnode_histogram.h:33
RTNode * right
Definition: rtnode.h:45
static const size_t uint_max
Definition: rtnode.h:34
size_t featureidx
Definition: rtnode.h:49