File size: 3,660 Bytes
fd49381 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
/*
* FeatureStats.h
* mert - Minimum Error Rate Training
*
* Created by Nicola Bertoldi on 13/05/08.
*
*/
#ifndef MERT_FEATURE_STATS_H_
#define MERT_FEATURE_STATS_H_
#include <cstring>
#include <iostream>
#include <map>
#include <string>
#include <vector>
#include <boost/unordered_map.hpp>
#include "util/string_piece.hh"
#include "Types.h"
namespace MosesTuning
{
// Minimal sparse vector
class SparseVector
{
public:
typedef std::map<std::size_t,FeatureStatsType> fvector_t;
typedef std::map<std::string, std::size_t> name2id_t;
typedef std::vector<std::string> id2name_t;
FeatureStatsType get(const std::string& name) const;
FeatureStatsType get(std::size_t id) const;
void set(const std::string& name, FeatureStatsType value);
void set(size_t id, FeatureStatsType value);
void clear();
void load(const std::string& file);
std::size_t size() const {
return m_fvector.size();
}
void write(std::ostream& out, const std::string& sep = " ") const;
SparseVector& operator-=(const SparseVector& rhs);
SparseVector& operator+=(const SparseVector& rhs);
FeatureStatsType inner_product(const SparseVector& rhs) const;
// Added by cherryc
std::vector<std::size_t> feats() const;
friend bool operator==(SparseVector const& item1, SparseVector const& item2);
friend std::size_t hash_value(SparseVector const& item);
static std::size_t encode(const std::string& feat);
static std::string decode(std::size_t feat);
// End added by cherryc
private:
static name2id_t m_name_to_id;
static id2name_t m_id_to_name;
fvector_t m_fvector;
};
SparseVector operator-(const SparseVector& lhs, const SparseVector& rhs);
FeatureStatsType inner_product(const SparseVector& lhs, const SparseVector& rhs);
class FeatureStats
{
private:
std::size_t m_available_size;
std::size_t m_entries;
// TODO: Use smart pointer for exceptional-safety.
featstats_t m_array;
SparseVector m_map;
public:
FeatureStats();
explicit FeatureStats(const std::size_t size);
~FeatureStats();
// We intentionally allow copying.
FeatureStats(const FeatureStats &stats);
FeatureStats& operator=(const FeatureStats &stats);
void Copy(const FeatureStats &stats);
bool isfull() const {
return (m_entries < m_available_size) ? 0 : 1;
}
void expand();
void add(FeatureStatsType v);
void addSparse(const std::string& name, FeatureStatsType v);
void clear() {
memset((void*)m_array, 0, GetArraySizeWithBytes());
m_map.clear();
}
void reset() {
m_entries = 0;
clear();
}
FeatureStatsType get(std::size_t i) {
return m_array[i];
}
FeatureStatsType get(std::size_t i)const {
return m_array[i];
}
featstats_t getArray() const {
return m_array;
}
const SparseVector& getSparse() const {
return m_map;
}
void set(std::string &theString, const SparseVector& sparseWeights);
inline std::size_t bytes() const {
return GetArraySizeWithBytes();
}
std::size_t GetArraySizeWithBytes() const {
return m_entries * sizeof(FeatureStatsType);
}
std::size_t size() const {
return m_entries;
}
std::size_t available() const {
return m_available_size;
}
void savetxt(const std::string &file);
void savetxt(std::ostream* os);
void savebin(std::ostream* os);
void savetxt();
void loadtxt(std::istream* is, const SparseVector& sparseWeights);
void loadbin(std::istream* is);
/**
* Write the whole object to a stream.
*/
friend std::ostream& operator<<(std::ostream& o, const FeatureStats& e);
};
bool operator==(const FeatureStats& f1, const FeatureStats& f2);
}
#endif // MERT_FEATURE_STATS_H_
|