QuickRank  v2.0
QuickRank: A C++ suite of Learning to Rank algorithms
cleaver.h
Go to the documentation of this file.
1 /*
2  * QuickRank - A C++ suite of Learning to Rank algorithms
3  * Webpage: http://quickrank.isti.cnr.it/
4  * Contact: quickrank@isti.cnr.it
5  *
6  * Unless explicitly acquired and licensed from Licensor under another
7  * license, the contents of this file are subject to the Reciprocal Public
8  * License ("RPL") Version 1.5, or subsequent versions as allowed by the RPL,
9  * and You may not copy or use this file in either source code or executable
10  * form, except in compliance with the terms and conditions of the RPL.
11  *
12  * All software distributed under the RPL is provided strictly on an "AS
13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, AND
14  * LICENSOR HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
15  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
16  * PURPOSE, QUIET ENJOYMENT, OR NON-INFRINGEMENT. See the RPL for specific
17  * language governing rights and limitations under the RPL.
18  *
19  * Contributors:
20  * - Salvatore Trani(salvatore.trani@isti.cnr.it)
21  */
22 #pragma once
23 
24 #include <memory>
25 #include <set>
26 
27 #include "data/dataset.h"
28 #include "metric/ir/metric.h"
29 #include "learning/ltr_algorithm.h"
33 #include "pugixml/src/pugixml.hpp"
34 
35 namespace quickrank {
36 namespace optimization {
37 namespace post_learning {
38 namespace pruning {
39 
40 /// This implements various strategies for pruning ensembles.
41 /// This optimization algorithm expect the datasets to be in the partial
42 /// scores format (i.e., a column for each ensemble, with the partial score
43 /// returned by that ensamble on each document (row of the original dataset)
45 
46  public:
47 
48  enum class PruningMethod {
50  };
51 
52  Cleaver(double pruning_rate);
53 
54  Cleaver(double pruning_rate,
55  std::shared_ptr<learning::linear::LineSearch> lineSearch);
56 
57  Cleaver(const pugi::xml_document &model);
58 
59  /// Returns the name of the optimizer.
60  std::string name() const {
61  return NAME_;
62  }
63 
64  /// Returns the pruning method of the algorithm.
65  virtual PruningMethod pruning_method() const = 0;
66 
67  virtual bool line_search_pre_pruning() const = 0;
68 
69  virtual bool need_partial_score_dataset() const {
70  return true;
71  };
72 
73  virtual void pruning(std::set<unsigned int> &pruned_estimators,
74  std::shared_ptr<data::Dataset> dataset,
75  std::shared_ptr<metric::ir::Metric> scorer) = 0;
76 
77  void optimize(std::shared_ptr<quickrank::learning::LTR_Algorithm> algo,
78  std::shared_ptr<quickrank::data::Dataset> training_dataset,
79  std::shared_ptr<quickrank::data::Dataset> validation_dataset,
80  std::shared_ptr<quickrank::metric::ir::Metric> metric,
81  size_t partial_save,
82  const std::string model_filename);
83 
84  /// Process the dataset filtering out features with 0-weight
85  virtual std::shared_ptr<data::Dataset> filter_dataset(
86  std::shared_ptr<data::Dataset> dataset,
87  std::set<unsigned int> &pruned_estimators) const;
88 
89  /// Return the xml model representing the current object
90  virtual pugi::xml_document *get_xml_model() const;
91 
92  static const std::vector<std::string> pruningMethodNames;
93 
94  static PruningMethod getPruningMethod(std::string name) {
95  auto i_item = std::find(pruningMethodNames.cbegin(),
96  pruningMethodNames.cend(),
97  name);
98  if (i_item != pruningMethodNames.cend()) {
99 
100  return PruningMethod(std::distance(pruningMethodNames.cbegin(), i_item));
101  }
102 
103  // TODO: Fix return value...
104  throw std::invalid_argument("pruning method " + name + " is not valid");
105 // return NULL;
106  }
107 
108  static std::string getPruningMethod(PruningMethod pruningMethod) {
109  return pruningMethodNames[static_cast<int>(pruningMethod)];
110  }
111 
112  /// Returns the learned weights
113  virtual std::vector<float> &get_weigths() {
114  return weights_;
115  }
116 
117  static const std::string NAME_;
118 
119  protected:
121  unsigned int estimators_to_prune_;
122  unsigned int estimators_to_select_;
123  std::shared_ptr<learning::linear::LineSearch> lineSearch_;
124 
125  std::vector<float> weights_;
126 
127  /// Prints the description of Algorithm, including its parameters
128  std::ostream &put(std::ostream &os) const;
129 
130  virtual void score(data::Dataset *dataset, Score *scores) const;
131 
132  virtual void import_weights_from_line_search(
133  std::set<unsigned int> &pruned_estimators);
134 };
135 
136 } // namespace pruning
137 } // namespace post_learning
138 } // namespace optimization
139 } // namespace quickrank
Definition: dataset.cc:28
virtual bool need_partial_score_dataset() const
Definition: cleaver.h:69
std::string name() const
Returns the name of the optimizer.
Definition: cleaver.h:60
virtual std::shared_ptr< data::Dataset > filter_dataset(std::shared_ptr< data::Dataset > dataset, std::set< unsigned int > &pruned_estimators) const
Process the dataset filtering out features with 0-weight.
Definition: cleaver.cc:315
static PruningMethod getPruningMethod(std::string name)
Definition: cleaver.h:94
This class implements a Dataset to be used for a L-t-R task.
Definition: dataset.h:45
Cleaver(double pruning_rate)
Definition: cleaver.cc:45
virtual void score(data::Dataset *dataset, Score *scores) const
Definition: cleaver.cc:287
virtual std::vector< float > & get_weigths()
Returns the learned weights.
Definition: cleaver.h:113
static const std::string NAME_
Definition: cleaver.h:117
std::ostream & put(std::ostream &os) const
Prints the description of Algorithm, including its parameters.
Definition: cleaver.cc:89
static std::string getPruningMethod(PruningMethod pruningMethod)
Definition: cleaver.h:108
virtual void pruning(std::set< unsigned int > &pruned_estimators, std::shared_ptr< data::Dataset > dataset, std::shared_ptr< metric::ir::Metric > scorer)=0
unsigned int estimators_to_select_
Definition: cleaver.h:122
double Score
data type for instance truth label
Definition: types.h:30
void optimize(std::shared_ptr< quickrank::learning::LTR_Algorithm > algo, std::shared_ptr< quickrank::data::Dataset > training_dataset, std::shared_ptr< quickrank::data::Dataset > validation_dataset, std::shared_ptr< quickrank::metric::ir::Metric > metric, size_t partial_save, const std::string model_filename)
Definition: cleaver.cc:102
virtual PruningMethod pruning_method() const =0
Returns the pruning method of the algorithm.
std::vector< float > weights_
Definition: cleaver.h:125
static const std::vector< std::string > pruningMethodNames
Definition: cleaver.h:92
virtual void import_weights_from_line_search(std::set< unsigned int > &pruned_estimators)
Definition: cleaver.cc:301
virtual pugi::xml_document * get_xml_model() const
Return the xml model representing the current object.
Definition: cleaver.cc:248
std::shared_ptr< learning::linear::LineSearch > lineSearch_
Definition: cleaver.h:123
This implements various strategies for pruning ensembles.
Definition: cleaver.h:44
unsigned int estimators_to_prune_
Definition: cleaver.h:121