File size: 4,050 Bytes
1911cc4 1ed2045 1911cc4 e91765f 1911cc4 e91765f 1911cc4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
#include <cstdlib>
#include <ctime>
#include <fstream>
#include <iostream>
#include <sstream>
#include <unordered_map>
#include <vector>
#include <algorithm>
using namespace std;
unordered_map<string, uint32_t> vocab;
unordered_map<uint64_t, vector<uint32_t>> hasilToOutput;
void preprocess(vector<string> &theString) {
vector<string> tmp;
for(auto s : theString) {
string tmpp;
for(auto c : s /*1.6 :P*/) {
if(!isalnum(c)) {
if(tmpp.length() == 0) {
tmp.push_back(string(1, c));
} else {
tmp.push_back(tmpp);
tmpp = "";
tmp.push_back(string(1, c));
}
} else {
tmpp += tolower(c);
}
}
if(tmpp != "") tmp.push_back(tmpp);
}
theString = tmp;
}
// === Fungsi: Load Model dari File ===
void loadModel(const string &filename) {
ifstream file(filename);
if (!file) {
cerr << "Gagal membuka file model.\n";
exit(1);
}
string line;
bool readingVocab = false;
bool readingMatch = false;
while (getline(file, line)) {
if (line == "Vocabs:") {
readingVocab = true;
readingMatch = false;
continue;
}
if (line == "Matchs:") {
readingVocab = false;
readingMatch = true;
continue;
}
if (readingVocab) {
size_t pos = line.find(": ");
if (pos != string::npos) {
string word = line.substr(0, pos);
uint32_t id = stoi(line.substr(pos + 2));
vocab[word] = id;
}
} else if (readingMatch) {
if (line.back() == ':') {
uint64_t key = stoull(line.substr(0, line.size() - 1));
getline(file, line); // [
vector<uint32_t> targets;
while (getline(file, line) && line != "]") {
if (!line.empty()) {
targets.push_back(stoi(line));
}
}
hasilToOutput[key] = targets;
}
}
}
file.close();
}
// === Fungsi: Prediksi Kata Berikutnya ===
string inferNextWord(const vector<string> &contextWords) {
uint64_t total = 0;
for (size_t i = 0; i < contextWords.size(); ++i) {
const string &word = contextWords[i];
if (vocab.count(word)) {
total += vocab[word] * (i + 1); // Bobot posisi
} else {
return "<unknown word: " + word + ">";
}
}
if (hasilToOutput.count(total) == 0) {
// Cari key terdekat
uint64_t closestKey = 0;
uint64_t minDiff = UINT64_MAX;
for (const auto &[key, _] : hasilToOutput) {
uint64_t diff = (key > total) ? key - total : total - key;
if (diff < minDiff) {
minDiff = diff;
closestKey = key;
}
}
if (minDiff == UINT64_MAX)
return "<no prediction>";
total = closestKey;
}
const auto &candidates = hasilToOutput[total];
// uint32_t predictedID = candidates[rand() % candidates.size()]
unordered_map<uint32_t, int> freq;
for (auto id : candidates) {
freq[id]++;
}
uint32_t predictedID = max_element(freq.begin(), freq.end(),
[](const pair<uint32_t, int> &a,
const pair<uint32_t, int> &b) {
return a.second < b.second;
})
->first;
// Balikkan ID ke kata
for (const auto &[word, id] : vocab) {
if (id == predictedID)
return word;
}
return "<not found>";
}
// === Main ===
int main() {
srand(time(0));
loadModel("model.txt");
cout << "Masukkan kalimat sebagai konteks:\n";
vector<string> context;
string word;
string words;
getline(cin, words);
stringstream ss(words);
while (ss >> word) {
context.push_back(word);
}
preprocess(context);
auto newContext = context;
string prediction;
int i = 0;
while (prediction != "[AKHIR]" && i < 50) {
prediction = inferNextWord(newContext);
newContext.push_back(prediction);
i++;
}
cout << "Prediksi kata berikutnya:";
for (auto m : newContext) {
cout << " " << m;
if (m.find("<unknown word:") != string::npos)
break;
}
cout << endl;
return 0;
}
|