13 #ifndef MLPACK_CORE_DATA_STRING_ENCODING_DICTIONARY_HPP
14 #define MLPACK_CORE_DATA_STRING_ENCODING_DICTIONARY_HPP
17 #include <mlpack/core/boost_backport/boost_backport_string_view.hpp>
18 #include <unordered_map>
31 template<
typename Token>
36 using MapType = std::unordered_map<Token, size_t>;
48 return mapping.find(token) != mapping.end();
61 size_t size = mapping.size();
63 mapping[std::forward<T>(token)] = ++size;
74 size_t Value(
const Token& token)
const
76 return mapping.at(token);
80 size_t Size()
const {
return mapping.size(); }
96 template<
typename Archive>
99 ar & BOOST_SERIALIZATION_NVP(mapping);
115 using MapType = std::unordered_map<
118 boost::hash<boost::string_view>>;
131 mapping[token] = other.mapping.at(token);
140 tokens = other.tokens;
144 mapping[token] = other.mapping.at(token);
158 bool HasToken(
const boost::string_view token)
const
160 return mapping.find(token) != mapping.end();
172 tokens.emplace_back(token);
174 size_t size = mapping.size();
176 mapping[tokens.back()] = ++size;
187 size_t Value(
const boost::string_view token)
const
189 return mapping.at(token);
193 size_t Size()
const {
return mapping.size(); }
203 const std::deque<std::string>&
Tokens()
const {
return tokens; }
205 std::deque<std::string>&
Tokens() {
return tokens; }
215 template<
typename Archive>
218 size_t numTokens = tokens.size();
220 ar & BOOST_SERIALIZATION_NVP(numTokens);
222 if (Archive::is_loading::value)
224 tokens.resize(numTokens);
228 ar & BOOST_SERIALIZATION_NVP(token);
230 size_t tokenValue = 0;
231 ar & BOOST_SERIALIZATION_NVP(tokenValue);
232 mapping[token] = tokenValue;
235 if (Archive::is_saving::value)
239 ar & BOOST_SERIALIZATION_NVP(token);
241 size_t tokenValue = mapping.at(token);
242 ar & BOOST_SERIALIZATION_NVP(tokenValue);
249 std::deque<std::string> tokens;
260 using MapType = std::array<size_t, 1 << CHAR_BIT>;
280 return mapping[token] > 0;
293 mapping[token] = ++size;
307 return mapping[token];
330 template<
typename Archive>
333 ar & BOOST_SERIALIZATION_NVP(mapping);
334 ar & BOOST_SERIALIZATION_NVP(size);
size_t AddToken(const int token)
The function adds the given token to the dictionary and assigns a label to the token.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
size_t AddToken(T &&token)
The function adds the given token to the dictionary and assigns a label to the token.
const MapType & Mapping() const
Get the mapping.
MapType & Mapping()
Modify the mapping.
MapType & Mapping()
Modify the mapping.
StringEncodingDictionary()
Construct the default class.
const std::deque< std::string > & Tokens() const
Get the tokens.
The core includes that mlpack expects; standard C++ includes and Armadillo.
const MapType & Mapping() const
Get the mapping.
void Clear()
Clear the dictionary.
size_t Size() const
Get the size of the dictionary.
std::array< size_t, 1<< CHAR_BIT > MapType
A convenient alias for the internal type of the map.
const MapType & Mapping() const
Get the mapping.
void serialize(Archive &ar, const unsigned int)
Serialize the class to the given archive.
This class provides a dictionary interface for the purpose of string encoding.
boost::string_view TokenType
The type of the token that the dictionary stores.
size_t Size() const
Get the size of the dictionary.
StringEncodingDictionary(const StringEncodingDictionary &other)
Copy the class from the given object.
void Clear()
Clear the dictionary.
std::deque< std::string > & Tokens()
Modify the tokens.
bool HasToken(const Token &token) const
The function returns true if the dictionary contains the given token.
MapType & Mapping()
Modify the mapping.
size_t Value(const Token &token) const
The function returns the label assigned to the given token.
Token TokenType
The type of the token that the dictionary stores.
bool HasToken(const boost::string_view token) const
The function returns true if the dictionary contains the given token.
bool HasToken(const int token) const
The function returns true if the dictionary contains the given token.
int TokenType
The type of the token that the dictionary stores.
size_t AddToken(const boost::string_view token)
The function adds the given token to the dictionary and assigns a label to the token.
size_t Value(const int token) const
The function returns the label assigned to the given token.
void Clear()
Clear the dictionary.
std::unordered_map< boost::string_view, size_t, boost::hash< boost::string_view >> MapType
A convenient alias for the internal type of the map.
size_t Value(const boost::string_view token) const
The function returns the label assigned to the given token.
size_t Size() const
Get the size of the dictionary.
std::unordered_map< Token, size_t > MapType
A convenient alias for the internal type of the map.
src mlpack core util version hpp VERSION_HPP_CONTENTS string(REGEX REPLACE".*#define MLPACK_VERSION_MAJOR ([0-9]+).*""\\1"MLPACK_VERSION_MAJOR"${VERSION_HPP_CONTENTS}") string(REGEX REPLACE".* MLPACK_VERSION_MINOR "$
StringEncodingDictionary & operator=(const StringEncodingDictionary &other)
Copy the class from the given object.