QuickRank  v2.0
QuickRank: A C++ suite of Learning to Rank algorithms
rankboost.h
Go to the documentation of this file.
1 /*
2  * QuickRank - A C++ suite of Learning to Rank algorithms
3  * Webpage: http://quickrank.isti.cnr.it/
4  * Contact: quickrank@isti.cnr.it
5  *
6  * Unless explicitly acquired and licensed from Licensor under another
7  * license, the contents of this file are subject to the Reciprocal Public
8  * License ("RPL") Version 1.5, or subsequent versions as allowed by the RPL,
9  * and You may not copy or use this file in either source code or executable
10  * form, except in compliance with the terms and conditions of the RPL.
11  *
12  * All software distributed under the RPL is provided strictly on an "AS
13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, AND
14  * LICENSOR HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
15  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
16  * PURPOSE, QUIET ENJOYMENT, OR NON-INFRINGEMENT. See the RPL for specific
17  * language governing rights and limitations under the RPL.
18  *
19  * Contributors:
20  * HPC. Laboratory - ISTI - CNR - http://hpc.isti.cnr.it/
21  * Tommaso Papini - https://github.com/oddlord
22  * Gabriele Bani - https://github.com/brnibani
23  */
24 #pragma once
25 
26 #include <memory>
27 
28 #include "data/dataset.h"
29 #include "metric/ir/metric.h"
30 #include "learning/ltr_algorithm.h"
31 
32 namespace quickrank {
33 namespace learning {
34 namespace forests {
35 
36 class WeakRanker {
37  public:
38 
39  WeakRanker(unsigned int feature_id, Feature theta, int sign) {
40  feature_id_ = feature_id;
41  theta_ = theta;
42  sign_ = sign;
43  }
44 
46  }
47 
48  unsigned int get_feature_id() const {
49  return feature_id_;
50  }
51 
52  Feature get_theta() const {
53  return theta_;
54  }
55 
56  int get_sign() const {
57  return sign_;
58  }
59 
60  unsigned int score_document(const quickrank::Feature *d) {
61  if (sign_ * d[feature_id_] > sign_ * theta_)
62  return 1;
63  return 0;
64  }
65 
67  return new WeakRanker(feature_id_, theta_, sign_);
68  }
69 
70  private:
71  unsigned int feature_id_ = 0;
72  Feature theta_ = 0.0;
73  int sign_ = 1;
74 
75  friend std::ostream &operator<<(std::ostream &os, const WeakRanker &a) {
76  return a.put(os);
77  }
78 
79  std::ostream &put(std::ostream &os) const {
80  os << "# WeakRanker " << feature_id_ << ":" << theta_ << " (" << ")"
81  << std::endl;
82  return os;
83  }
84 
85 };
86 
87 /// This implements the RankBoost algorithm.
88 ///
89 /// Freund, Y., Iyer, R., Schapire, R. E., & Singer, Y. (2003).
90 /// An efficient boosting algorithm for combining preferences.
91 /// The Journal of machine learning research, 4, 933-969.
92 class Rankboost: public LTR_Algorithm {
93  public:
94  Rankboost(size_t max_wr);
95 
96  Rankboost(const pugi::xml_document &model);
97 
98  virtual ~Rankboost();
99 
100  /// Avoid inefficient copy constructor
101  Rankboost(const Rankboost &other) = delete;
102  /// Avoid inefficient copy assignment
103  Rankboost &operator=(const Rankboost &) = delete;
104 
105  /// Returns the name of the ranker.
106  virtual std::string name() const {
107  return NAME_;
108  }
109 
110  static const std::string NAME_;
111 
112  /// Executes the learning process.
113  ///
114  /// \param training_dataset The training dataset.
115  /// \param validation_dataset The validation training dataset.
116  /// \param metric The metric to be optimized.
117  /// \param partial_save Allows to save a partial model every given number of iterations.
118  /// \param model_filename The file where the model, and the partial models, are saved.
119  virtual void learn(
120  std::shared_ptr<data::Dataset> training_dataset,
121  std::shared_ptr<data::Dataset> validation_dataset,
122  std::shared_ptr<metric::ir::Metric> metric,
123  size_t partial_save,
124  const std::string model_filename);
125 
126  /// Returns the score of a given document.
127  virtual Score score_document(const Feature *d) const;
128 
129  /// Returns the partial scores of a given document, tree.
130  /// \param d is a pointer to the document to be evaluated
131  virtual std::shared_ptr<std::vector<Score>> partial_scores_document(
132  const Feature *d) const;
133 
134  /// Return the xml model representing the current object
135  virtual pugi::xml_document *get_xml_model() const;
136 
137  virtual bool update_weights(std::vector<double> &weights);
138 
139  virtual std::shared_ptr<std::vector<double>> get_weights() const;
140 
141  private:
142  float ***D = NULL;
143  float **PI = NULL;
144  Feature **THETA = NULL;
145  unsigned int *n_theta = NULL;
146  unsigned int ***SDF = NULL;
147  Score *training_scores = NULL;
148  Score *validation_scores = NULL;
149  size_t T;
150  size_t best_T;
152  char const *omp_schedule;
153  WeakRanker **weak_rankers = NULL;
154  float *alphas = NULL;
155  float best_r = 0.0;
156  float max_alpha = 0.0;
157  float r_t = 0.0;
158  float z_t = 1.0;
159 
160  void init(std::shared_ptr<data::Dataset> training_dataset,
161  std::shared_ptr<data::Dataset> validation_dataset);
162  void compute_pi(std::shared_ptr<data::Dataset> dataset);
163  WeakRanker *compute_weak_ranker(std::shared_ptr<data::Dataset> dataset);
164  void update_d
165  (std::shared_ptr<data::Dataset> dataset, WeakRanker *wr, float alpha);
166  MetricScore compute_metric_score(std::shared_ptr<data::Dataset> dataset,
167  std::shared_ptr<quickrank::metric::ir::Metric> scorer);
168  void clean(std::shared_ptr<data::Dataset> dataset);
169 
170 
171  /// The output stream operator.
172 
173  friend std::ostream &operator<<(std::ostream &os, const Rankboost &a) {
174  return a.put(os);
175  }
176 
177  /// Prints the description of Algorithm, including its parameters
178  virtual std::ostream &put(std::ostream &os) const;
179 };
180 } // namespace forests
181 } // namespace learning
182 } // namespace quickrank
183 
Definition: dataset.cc:28
int get_sign() const
Definition: rankboost.h:56
unsigned int feature_id_
Definition: rankboost.h:71
virtual std::ostream & put(std::ostream &os) const
Prints the description of Algorithm, including its parameters.
Definition: rankboost.cc:96
This implements the RankBoost algorithm.
Definition: rankboost.h:92
Definition: ltr_algorithm.h:33
float Feature
data type for instance predicted label
Definition: types.h:31
Feature theta_
Definition: rankboost.h:72
size_t T
Definition: rankboost.h:149
unsigned int score_document(const quickrank::Feature *d)
Definition: rankboost.h:60
size_t best_T
Definition: rankboost.h:150
Feature get_theta() const
Definition: rankboost.h:52
static const std::string NAME_
Definition: rankboost.h:110
double Score
data type for instance truth label
Definition: types.h:30
virtual std::string name() const
Returns the name of the ranker.
Definition: rankboost.h:106
char const * omp_schedule
Definition: rankboost.h:152
~WeakRanker()
Definition: rankboost.h:45
unsigned int get_feature_id() const
Definition: rankboost.h:48
std::ostream & put(std::ostream &os) const
Definition: rankboost.h:79
WeakRanker * clone()
Definition: rankboost.h:66
WeakRanker(unsigned int feature_id, Feature theta, int sign)
Definition: rankboost.h:39
friend std::ostream & operator<<(std::ostream &os, const Rankboost &a)
The output stream operator.
Definition: rankboost.h:173
int sign_
Definition: rankboost.h:73
bool go_parallel
Definition: rankboost.h:151
double MetricScore
data type for QueryID in L-t-R datasets
Definition: types.h:33
friend std::ostream & operator<<(std::ostream &os, const WeakRanker &a)
Definition: rankboost.h:75