13 #ifndef MLPACK_CORE_DATA_STRING_ENCODING_POLICIES_DICTIONARY_ENCODING_POLICY_HPP
14 #define MLPACK_CORE_DATA_STRING_ENCODING_POLICIES_DICTIONARY_ENCODING_POLICY_HPP
55 template<
typename MatType>
57 const size_t datasetSize,
58 const size_t maxNumTokens,
61 output.zeros(maxNumTokens, datasetSize);
76 template<
typename MatType>
82 output(index, line) = value;
96 template<
typename ElemType>
97 static void Encode(std::vector<ElemType>& output,
size_t value)
99 output.push_back(value);
117 template<
typename Archive>
144 template<
typename TokenType>
This is a template struct that provides some information about various encoding policies.
static void PreprocessToken(const size_t, const size_t, const size_t)
The function is not used by the dictionary encoding policy.
static const bool onePassEncoding
Indicates if the policy is able to encode the token at once without any information about other token...
static void Encode(std::vector< ElemType > &output, size_t value)
The function performs the dictionary encoding algorithm i.e.
The core includes that mlpack expects; standard C++ includes and Armadillo.
The class translates a set of strings into numbers using various encoding algorithms.
static void Encode(MatType &output, const size_t value, const size_t line, const size_t index)
The function performs the dictionary encoding algorithm i.e.
This class provides a dictionary interface for the purpose of string encoding.
static void Reset()
Clear the necessary internal variables.
DicitonaryEnocdingPolicy is used as a helper class for StringEncoding.
static void InitMatrix(MatType &output, const size_t datasetSize, const size_t maxNumTokens, const size_t)
The function initializes the output matrix.
void serialize(Archive &, const unsigned int)
Serialize the class to the given archive.