theJuniorProgrammer3
commited on
Commit
·
e91765f
1
Parent(s):
2166db3
Add preprocess
Browse files
infer.cpp
CHANGED
|
@@ -12,6 +12,28 @@ using namespace std;
|
|
| 12 |
unordered_map<string, uint32_t> vocab;
|
| 13 |
unordered_map<uint64_t, vector<uint32_t>> hasilToOutput;
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
// === Fungsi: Load Model dari File ===
|
| 16 |
void loadModel(const string &filename) {
|
| 17 |
ifstream file(filename);
|
|
@@ -127,6 +149,7 @@ int main() {
|
|
| 127 |
while (ss >> word) {
|
| 128 |
context.push_back(word);
|
| 129 |
}
|
|
|
|
| 130 |
auto newContext = context;
|
| 131 |
|
| 132 |
string prediction;
|
|
|
|
| 12 |
unordered_map<string, uint32_t> vocab;
|
| 13 |
unordered_map<uint64_t, vector<uint32_t>> hasilToOutput;
|
| 14 |
|
| 15 |
+
void preprocess(vector<string> &theString) {
|
| 16 |
+
vector<string> tmp;
|
| 17 |
+
for(auto s : theString) {
|
| 18 |
+
string tmpp;
|
| 19 |
+
for(auto c : s /*1.6 :P*/) {
|
| 20 |
+
if(!isalnum(c)) {
|
| 21 |
+
if(tmpp.length() == 0) {
|
| 22 |
+
tmp.push_back(string(1, c));
|
| 23 |
+
} else {
|
| 24 |
+
tmp.push_back(tmpp);
|
| 25 |
+
tmpp = "";
|
| 26 |
+
tmp.push_back(string(1, c));
|
| 27 |
+
}
|
| 28 |
+
} else {
|
| 29 |
+
tmpp += tolower(c);
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
if(tmpp != "") tmp.push_back(tmpp);
|
| 33 |
+
}
|
| 34 |
+
theString = tmp;
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
// === Fungsi: Load Model dari File ===
|
| 38 |
void loadModel(const string &filename) {
|
| 39 |
ifstream file(filename);
|
|
|
|
| 149 |
while (ss >> word) {
|
| 150 |
context.push_back(word);
|
| 151 |
}
|
| 152 |
+
preprocess(context);
|
| 153 |
auto newContext = context;
|
| 154 |
|
| 155 |
string prediction;
|