QuickRank  v2.0
QuickRank: A C++ suite of Learning to Rank algorithms
ltr_algorithm.h
Go to the documentation of this file.
1 /*
2  * QuickRank - A C++ suite of Learning to Rank algorithms
3  * Webpage: http://quickrank.isti.cnr.it/
4  * Contact: quickrank@isti.cnr.it
5  *
6  * Unless explicitly acquired and licensed from Licensor under another
7  * license, the contents of this file are subject to the Reciprocal Public
8  * License ("RPL") Version 1.5, or subsequent versions as allowed by the RPL,
9  * and You may not copy or use this file in either source code or executable
10  * form, except in compliance with the terms and conditions of the RPL.
11  *
12  * All software distributed under the RPL is provided strictly on an "AS
13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, AND
14  * LICENSOR HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
15  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
16  * PURPOSE, QUIET ENJOYMENT, OR NON-INFRINGEMENT. See the RPL for specific
17  * language governing rights and limitations under the RPL.
18  *
19  * Contributor:
20  * HPC. Laboratory - ISTI - CNR - http://hpc.isti.cnr.it/
21  */
22 #pragma once
23 
24 #include <memory>
25 
26 #include "data/dataset.h"
27 #include "metric/ir/metric.h"
28 #include "pugixml/src/pugixml.hpp"
29 
30 namespace quickrank {
31 namespace learning {
32 
34 
35  public:
37  }
38 
39  /// Generates a LTR_Algorithm instance from a previously saved XML model.
40  LTR_Algorithm(const pugi::xml_document &model);
41 
42  virtual ~LTR_Algorithm() {
43  }
44 
45  /// Avoid inefficient copy constructor
46  LTR_Algorithm(const LTR_Algorithm &other) = delete;
47  /// Avoid inefficient copy assignment
48  LTR_Algorithm &operator=(const LTR_Algorithm &) = delete;
49 
50  /// Returns the name of the ranker.
51  virtual std::string name() const = 0;
52 
53  /// Executes the learning process.
54  ///
55  /// \param training_dataset The training dataset.
56  /// \param validation_dataset The validation training dataset.
57  /// \param metric The metric to be optimized.
58  /// \param partial_save Allows to save a partial model every given number of iterations.
59  /// \param model_filename The file where the model, and the partial models, are saved.
60  virtual void learn(std::shared_ptr<data::Dataset> training_dataset,
61  std::shared_ptr<data::Dataset> validation_dataset,
62  std::shared_ptr<metric::ir::Metric> metric,
63  size_t partial_save,
64  const std::string model_filename) = 0;
65 
66  /// Given and input \a dateset, the current ranker generates
67  /// scores for each instance and store the in the \a scores vector.
68  ///
69  /// \param dataset The dataset to be scored.
70  /// \param scores The vector where scores are stored.
71  /// \note Before scoring it invokes the function \a preprocess_dataset.
72  /// Usually this does not need to be overridden.
73  virtual void score_dataset(std::shared_ptr<data::Dataset> dataset,
74  Score *scores) const;
75 
76  /// Returns the score of a given document.
77  /// \param d is a pointer to the document to be evaluated
78  /// \note Each algorithm has a different implementation.
79  virtual Score score_document(const Feature *d) const = 0;
80 
81  /// Returns the partial score of a given document, tree by tree.
82  /// \param d is a pointer to the document to be evaluated
83  /// \param next_fx_offset The offset to the next feature in the data representation.
84  /// \note Each algorithm has a different implementation.
85  virtual std::shared_ptr<std::vector<Score>> partial_scores_document(
86  const Feature *d) const {
87  return nullptr;
88  }
89 
90  /// Save the current model to the output_file.
91  ///
92  /// \param model_filename The output file name.
93  /// \param suffix The suffix used to identify partial model saves.
94  virtual void save(std::string model_filename, int suffix = -1) const;
95 
96  /// Load a model from a given XML file.
97  ///
98  /// \param model_filename The input file name.
99  static std::shared_ptr<LTR_Algorithm> load_model_from_file(
100  std::string model_filename);
101 
102  /// Return the xml model representing the current object
103  virtual pugi::xml_document *get_xml_model() const = 0;
104 
105  /// Print additional statistics.
106  ///
107  /// At the moment this include only number of comparisons for tree-based algorithms.
108  virtual void print_additional_stats(void) const {
109  }
110 
111  /// Update the weights for the ensemble models (only).
112  ///
113  /// Default implementation will do nothing (default for non ensemble models).
114  virtual bool update_weights(std::shared_ptr<std::vector<double>> weights) {
115  return false;
116  }
117 
118  /// Return the weights for the ensemble models (only).
119  ///
120  /// Default implementation will do nothing (default for non ensemble models).
121  virtual std::shared_ptr<std::vector<double>> get_weights() const {
122  return nullptr;
123  }
124 
125  private:
126 
127  /// The output stream operator.
128  friend std::ostream &operator<<(std::ostream &os, const LTR_Algorithm &a) {
129  return a.put(os);
130  }
131 
132  /// Prints the description of Algorithm, including its parameters
133  virtual std::ostream &put(std::ostream &os) const = 0;
134 
135 };
136 
137 } // namespace learning
138 } // namespace quickrank
Definition: dataset.cc:28
friend std::ostream & operator<<(std::ostream &os, const LTR_Algorithm &a)
The output stream operator.
Definition: ltr_algorithm.h:128
virtual void score_dataset(std::shared_ptr< data::Dataset > dataset, Score *scores) const
Given and input dateset, the current ranker generates scores for each instance and store the in the s...
Definition: ltr_algorithm.cc:42
LTR_Algorithm()
Definition: ltr_algorithm.h:36
virtual Score score_document(const Feature *d) const =0
Returns the score of a given document.
virtual void save(std::string model_filename, int suffix=-1) const
Save the current model to the output_file.
Definition: ltr_algorithm.cc:51
Definition: ltr_algorithm.h:33
virtual void print_additional_stats(void) const
Print additional statistics.
Definition: ltr_algorithm.h:108
virtual std::shared_ptr< std::vector< double > > get_weights() const
Return the weights for the ensemble models (only).
Definition: ltr_algorithm.h:121
float Feature
data type for instance predicted label
Definition: types.h:31
virtual void learn(std::shared_ptr< data::Dataset > training_dataset, std::shared_ptr< data::Dataset > validation_dataset, std::shared_ptr< metric::ir::Metric > metric, size_t partial_save, const std::string model_filename)=0
Executes the learning process.
double Score
data type for instance truth label
Definition: types.h:30
static std::shared_ptr< LTR_Algorithm > load_model_from_file(std::string model_filename)
Load a model from a given XML file.
Definition: ltr_algorithm.cc:64
virtual std::string name() const =0
Returns the name of the ranker.
virtual pugi::xml_document * get_xml_model() const =0
Return the xml model representing the current object.
LTR_Algorithm & operator=(const LTR_Algorithm &)=delete
Avoid inefficient copy assignment.
virtual std::ostream & put(std::ostream &os) const =0
Prints the description of Algorithm, including its parameters.
virtual ~LTR_Algorithm()
Definition: ltr_algorithm.h:42
virtual bool update_weights(std::shared_ptr< std::vector< double >> weights)
Update the weights for the ensemble models (only).
Definition: ltr_algorithm.h:114
virtual std::shared_ptr< std::vector< Score > > partial_scores_document(const Feature *d) const
Returns the partial score of a given document, tree by tree.
Definition: ltr_algorithm.h:85