#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include "NMTWrapper.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; // Link with Ws2_32.lib #pragma comment(lib, "Ws2_32.lib") // Dedicated port (avoids clashing with HTTP/dev servers on 8080) #define DEFAULT_PORT "18080" // Max UTF-8 request body (line-delimited); must match or exceed client send size #define MAX_REQUEST_BYTES (256 * 1024) // Read one request: UTF-8 bytes until '\n' (recommended) or EOF. Strips optional '\r'. static string recv_request_line(SOCKET s) { string buf; buf.reserve(4096); char chunk[4096]; while (buf.size() < MAX_REQUEST_BYTES) { int n = recv(s, chunk, static_cast(sizeof(chunk)), 0); if (n <= 0) break; buf.append(chunk, static_cast(n)); if (buf.find('\n') != string::npos) break; // Legacy clients: no '\n' but full message fits in one recv (typical short lines) if (n < static_cast(sizeof(chunk))) break; } size_t nl = buf.find('\n'); if (nl != string::npos) buf.resize(nl); if (!buf.empty() && buf.back() == '\r') buf.pop_back(); return buf; } // Browsers may still hit arbitrary ports with HTTP; do not send that to NMT. static bool looks_like_http_request_line(const string &s) { auto starts_ci = [&s](const char *word) { size_t len = strlen(word); if (s.size() < len) return false; for (size_t i = 0; i < len; ++i) { unsigned char c = static_cast(s[i]); unsigned char w = static_cast(word[i]); if (std::tolower(c) != std::tolower(w)) return false; } return true; }; return starts_ci("GET ") || starts_ci("POST ") || starts_ci("PUT ") || starts_ci("HEAD ") || starts_ci("DELETE ") || starts_ci("OPTIONS ") || starts_ci("PATCH ") || starts_ci("CONNECT ") || starts_ci("TRACE "); } static void send_http_not_nmt_message(SOCKET s) { const char *body = "This TCP port is the English-to-Italian NMT server (raw UTF-8 lines), " "not an HTTP server. Point your HTTP client or Unity WebRequest at a " "different port, or use a dedicated NMT client.\r\n"; const int body_len = static_cast(strlen(body)); char header[256]; int hdr_len = snprintf(header, sizeof(header), "HTTP/1.1 400 Bad Request\r\n" "Content-Type: text/plain; charset=utf-8\r\n" "Connection: close\r\n" "Content-Length: %d\r\n" "\r\n", body_len); if (hdr_len > 0 && hdr_len < static_cast(sizeof(header))) { send(s, header, hdr_len, 0); send(s, body, body_len, 0); } } int main() { // OpenBLAS + CTranslate2 + OpenMP on Windows can trigger allocator corruption // ("Bad memory unallocation") unless BLAS uses a single thread. _putenv_s("OPENBLAS_NUM_THREADS", "1"); _putenv_s("GOTO_NUM_THREADS", "1"); // Let CTranslate2 use several OpenMP threads for compute (like evaluate_nmt_fast.py). // If you see BLAS errors again, comment this out or set to "1". _putenv_s("OMP_NUM_THREADS", "8"); // Force UTF-8 for Italian characters in console SetConsoleOutputCP(CP_UTF8); // 1. Initialize Winsock WSADATA wsaData; int iResult = WSAStartup(MAKEWORD(2, 2), &wsaData); if (iResult != 0) { cerr << "WSAStartup failed with error: " << iResult << endl; return 1; } // 2. Setup Address Info struct addrinfo *result = NULL; struct addrinfo hints; ZeroMemory(&hints, sizeof(hints)); hints.ai_family = AF_INET; hints.ai_socktype = SOCK_STREAM; hints.ai_protocol = IPPROTO_TCP; hints.ai_flags = AI_PASSIVE; iResult = getaddrinfo(NULL, DEFAULT_PORT, &hints, &result); if (iResult != 0) { cerr << "getaddrinfo failed with error: " << iResult << endl; WSACleanup(); return 1; } // 3. Create Listening Socket SOCKET ListenSocket = socket(result->ai_family, result->ai_socktype, result->ai_protocol); if (ListenSocket == INVALID_SOCKET) { cerr << "socket failed with error: " << WSAGetLastError() << endl; freeaddrinfo(result); WSACleanup(); return 1; } // 4. Bind Socket iResult = bind(ListenSocket, result->ai_addr, (int)result->ai_addrlen); if (iResult == SOCKET_ERROR) { cerr << "bind failed with error: " << WSAGetLastError() << endl; freeaddrinfo(result); closesocket(ListenSocket); WSACleanup(); return 1; } freeaddrinfo(result); // 5. Listen iResult = listen(ListenSocket, SOMAXCONN); if (iResult == SOCKET_ERROR) { cerr << "listen failed with error: " << WSAGetLastError() << endl; closesocket(ListenSocket); WSACleanup(); return 1; } // 6. Load NMT in parallel with accepting connections (weights must still be read from // disk once; this removes the old "nothing listens until load finishes" stall). mutex engine_mutex; condition_variable engine_cv; unique_ptr engine; bool engine_failed = false; exception_ptr load_exception; thread([&]() { try { auto eng = make_unique("artifacts/ct2/en_it_v4_casual_weighted/model", "artifacts/ct2/en_it_v4_casual_weighted/model/sentencepiece.bpe.model"); // One warmup decode so the first real client pays less JIT/cache cost. (void)eng->translate("Hi"); lock_guard lock(engine_mutex); engine = std::move(eng); } catch (...) { lock_guard lock(engine_mutex); load_exception = current_exception(); engine_failed = true; } engine_cv.notify_all(); }).detach(); cout << "✅ Listening on port " << DEFAULT_PORT << " — model loading in background (connect anytime; translate waits until " "ready).\n"; // 7. Server Loop while (true) { SOCKET ClientSocket = accept(ListenSocket, NULL, NULL); if (ClientSocket == INVALID_SOCKET) { cerr << "accept failed with error: " << WSAGetLastError() << endl; continue; } // Low latency for small request/response (same idea as disabling Nagle in RPC clients) BOOL nodelay = TRUE; setsockopt(ClientSocket, IPPROTO_TCP, TCP_NODELAY, reinterpret_cast(&nodelay), sizeof(nodelay)); // Receive English text (send UTF-8 ending with \n for framing; avoids 1024-byte truncation) string input = recv_request_line(ClientSocket); if (!input.empty()) { cout << "📥 Received: " << input << endl; if (looks_like_http_request_line(input)) { cout << "⚠️ Ignoring HTTP request (wrong service on this port).\n"; send_http_not_nmt_message(ClientSocket); closesocket(ClientSocket); continue; } NMT::NMTWrapper *engine_ptr = nullptr; { unique_lock lk(engine_mutex); engine_cv.wait(lk, [&] { return engine != nullptr || engine_failed; }); if (engine_failed) { cerr << "❌ NMT failed to load.\n"; try { if (load_exception) rethrow_exception(load_exception); } catch (const exception &e) { cerr << e.what() << endl; } const char *err = "Error: NMT engine not loaded.\n"; send(ClientSocket, err, static_cast(strlen(err)), 0); closesocket(ClientSocket); continue; } engine_ptr = engine.get(); } string output = engine_ptr->translate(input); cout << "📤 Sending: " << output << endl; // Send back Italian response int iSendResult = send(ClientSocket, output.c_str(), (int)output.length(), 0); if (iSendResult == SOCKET_ERROR) { cerr << "send failed with error: " << WSAGetLastError() << endl; } } else { cout << "Connection closing or empty request..." << endl; } // Cleanup client socket closesocket(ClientSocket); } // Cleanup (though we have an infinite loop) closesocket(ListenSocket); WSACleanup(); return 0; }