QuickRank  v2.0
QuickRank: A C++ suite of Learning to Rank algorithms
vertical_dataset.h
Go to the documentation of this file.
1 /*
2  * QuickRank - A C++ suite of Learning to Rank algorithms
3  * Webpage: http://quickrank.isti.cnr.it/
4  * Contact: quickrank@isti.cnr.it
5  *
6  * Unless explicitly acquired and licensed from Licensor under another
7  * license, the contents of this file are subject to the Reciprocal Public
8  * License ("RPL") Version 1.5, or subsequent versions as allowed by the RPL,
9  * and You may not copy or use this file in either source code or executable
10  * form, except in compliance with the terms and conditions of the RPL.
11  *
12  * All software distributed under the RPL is provided strictly on an "AS
13  * IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, AND
14  * LICENSOR HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
15  * LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
16  * PURPOSE, QUIET ENJOYMENT, OR NON-INFRINGEMENT. See the RPL for specific
17  * language governing rights and limitations under the RPL.
18  *
19  * Contributor:
20  * HPC. Laboratory - ISTI - CNR - http://hpc.isti.cnr.it/
21  */
22 #pragma once
23 
24 #include <iostream>
25 #include <memory>
26 #include <vector>
27 
28 #include "types.h"
29 #include "data/queryresults.h"
30 #include "dataset.h"
31 
32 namespace quickrank {
33 namespace data {
34 
35 /**
36  * This class implements a Dataset to be used for a L-t-R task.
37  *
38  * The internal representation is vertical: a row vector
39  * of size \a num_instances() x \a num_features().
40  * (A training instance is indeed a document.)
41  * We allow to directly
42  * access the internal representation through the function \a at()
43  * to support fast access and custom high performance implementations.
44  * Representation is vertical, i.e., a matrix features x documents.
45  */
47  public:
48 
49  /// Allocates a vertical dataset by copying and transposing an horizontal one.
50  ///
51  /// \param h_dataset The horizontal dataset.
52  VerticalDataset(std::shared_ptr<Dataset> h_dataset);
53  virtual ~VerticalDataset();
54 
55  /// Avoid inefficient copy constructor
56  VerticalDataset(const VerticalDataset &other) = delete;
57  /// Avoid inefficient copy assignment
58  VerticalDataset &operator=(const VerticalDataset &) = delete;
59 
60  /// Returns a pointer to a specific data item.
61  ///
62  /// \param document_id The document of interest.
63  /// \param feature_id The feature of interest.
64  /// \returns A reference to the requested feature value of the given document id.
65  quickrank::Feature *at(size_t document_id, size_t feature_id) {
66  return data_ + document_id + feature_id * num_instances_;
67  }
68 
69  /// Returns the value of the i-th relevance label.
70  Label getLabel(size_t document_id) {
71  return labels_[document_id];
72  }
73 
74  /// Returns the offset in the internal data strcutures of the i-th query results list.
75  ///
76  /// \param i The i-th query results list of interest.
77  /// \returns The offset of the first document in the i-th query results list.
78  /// This can be used to later invoke the \a at() function.
79  unsigned int offset(size_t i) const {
80  return offsets_[i];
81  }
82 
83  /// Returns the i-th QueryResults in the dataset.
84  ///
85  /// \param i The i-th query results list of interest.
86  /// \returns The requested QueryResults.
87  std::unique_ptr<QueryResults> getQueryResults(size_t i) const;
88 
89  /// Returns the number of features used to represent a document.
90  unsigned int num_features() const {
91  return num_features_;
92  }
93  /// Returns the number of queries in the dataset.
94  unsigned int num_queries() const {
95  return num_queries_;
96  }
97  /// Returns the number of documents in the dataset.
98  unsigned int num_instances() const {
99  return num_instances_;
100  }
101 
102  private:
103 
105  size_t num_queries_;
107 
110  std::vector<size_t> offsets_;
111 
112  /// The output stream operator.
113  /// Prints the data reading time stats
114  friend std::ostream &operator<<(std::ostream &os, const VerticalDataset &me) {
115  return me.put(os);
116  }
117 
118  /// Prints the data reading time stats
119  virtual std::ostream &put(std::ostream &os) const;
120 
121 };
122 
123 } // namespace data
124 } // namespace quickrank
125 
size_t num_queries_
Definition: vertical_dataset.h:105
Definition: dataset.cc:28
friend std::ostream & operator<<(std::ostream &os, const VerticalDataset &me)
The output stream operator.
Definition: vertical_dataset.h:114
quickrank::Feature * data_
Definition: vertical_dataset.h:108
unsigned int num_instances() const
Returns the number of documents in the dataset.
Definition: vertical_dataset.h:98
unsigned int offset(size_t i) const
Returns the offset in the internal data strcutures of the i-th query results list.
Definition: vertical_dataset.h:79
float Feature
data type for instance predicted label
Definition: types.h:31
quickrank::Label * labels_
Definition: vertical_dataset.h:109
unsigned int num_features() const
Returns the number of features used to represent a document.
Definition: vertical_dataset.h:90
std::unique_ptr< QueryResults > getQueryResults(size_t i) const
Returns the i-th QueryResults in the dataset.
Definition: vertical_dataset.cc:77
virtual ~VerticalDataset()
Definition: vertical_dataset.cc:69
Label getLabel(size_t document_id)
Returns the value of the i-th relevance label.
Definition: vertical_dataset.h:70
quickrank::Feature * at(size_t document_id, size_t feature_id)
Returns a pointer to a specific data item.
Definition: vertical_dataset.h:65
virtual std::ostream & put(std::ostream &os) const
Prints the data reading time stats.
Definition: vertical_dataset.cc:88
This class implements a Dataset to be used for a L-t-R task.
Definition: vertical_dataset.h:46
VerticalDataset(std::shared_ptr< Dataset > h_dataset)
Allocates a vertical dataset by copying and transposing an horizontal one.
Definition: vertical_dataset.cc:30
std::vector< size_t > offsets_
Definition: vertical_dataset.h:110
size_t num_features_
Definition: vertical_dataset.h:104
VerticalDataset & operator=(const VerticalDataset &)=delete
Avoid inefficient copy assignment.
unsigned int num_queries() const
Returns the number of queries in the dataset.
Definition: vertical_dataset.h:94
size_t num_instances_
Definition: vertical_dataset.h:106
float Label
Definition: types.h:29