Spaces:
Sleeping
Sleeping
| """ | |
| Send many English lines from a text file to the NMT TCP server (one request per line), | |
| measure round-trip time for each translation, and write a report. | |
| Usage (with server on default port 18080): | |
| python scripts/benchmark_tcp_batch.py | |
| python scripts/benchmark_tcp_batch.py --file path/to/sentences.txt | |
| python scripts/benchmark_tcp_batch.py --host 127.0.0.1 --port 18080 --out results.tsv | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import socket | |
| import sys | |
| import time | |
| from pathlib import Path | |
| def recv_until_close(sock: socket.socket, max_bytes: int = 4 * 1024 * 1024) -> bytes: | |
| """Server sends one reply then closes; read until EOF.""" | |
| chunks: list[bytes] = [] | |
| total = 0 | |
| while total < max_bytes: | |
| chunk = sock.recv(65536) | |
| if not chunk: | |
| break | |
| chunks.append(chunk) | |
| total += len(chunk) | |
| return b"".join(chunks) | |
| def translate_one(text: str, host: str, port: int, timeout: float) -> tuple[str, float]: | |
| t0 = time.perf_counter() | |
| with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: | |
| s.settimeout(timeout) | |
| s.connect((host, port)) | |
| s.sendall(text.encode("utf-8") + b"\n") | |
| raw = recv_until_close(s) | |
| elapsed_ms = (time.perf_counter() - t0) * 1000.0 | |
| try: | |
| out = raw.decode("utf-8") | |
| except UnicodeDecodeError: | |
| out = raw.decode("utf-8", errors="replace") | |
| return out, elapsed_ms | |
| def load_lines(path: Path) -> list[str]: | |
| text = path.read_text(encoding="utf-8") | |
| lines = [] | |
| for line in text.splitlines(): | |
| s = line.strip() | |
| if not s or s.startswith("#"): | |
| continue | |
| lines.append(s) | |
| return lines | |
| def main() -> None: | |
| root = Path(__file__).resolve().parents[1] | |
| default_file = root / "scripts" / "sample_sentences_en.txt" | |
| p = argparse.ArgumentParser(description="Benchmark NMT TCP server sentence-by-sentence.") | |
| p.add_argument( | |
| "--file", | |
| "-f", | |
| type=Path, | |
| default=default_file, | |
| help="UTF-8 text file: one English sentence per line (# = comment)", | |
| ) | |
| p.add_argument("--host", default="127.0.0.1", help="NMT server host") | |
| p.add_argument("--port", type=int, default=18080, help="NMT server port") | |
| p.add_argument( | |
| "--out", | |
| "-o", | |
| type=Path, | |
| default=None, | |
| help="Write TSV report (default: scripts/benchmark_tcp_results.tsv)", | |
| ) | |
| p.add_argument( | |
| "--timeout", | |
| type=float, | |
| default=120.0, | |
| help="Per-request socket timeout in seconds", | |
| ) | |
| args = p.parse_args() | |
| if not args.file.is_file(): | |
| print(f"File not found: {args.file}", file=sys.stderr) | |
| sys.exit(1) | |
| sentences = load_lines(args.file) | |
| if not sentences: | |
| print("No sentences to translate (empty file or only comments).", file=sys.stderr) | |
| sys.exit(1) | |
| out_path = args.out | |
| if out_path is None: | |
| out_path = root / "scripts" / "benchmark_tcp_results.tsv" | |
| rows: list[tuple[int, str, str, float]] = [] | |
| print(f"Loaded {len(sentences)} lines from {args.file}") | |
| print(f"Connecting to {args.host}:{args.port} …\n") | |
| for i, line in enumerate(sentences, start=1): | |
| try: | |
| italian, ms = translate_one(line, args.host, args.port, args.timeout) | |
| except Exception as e: | |
| print(f"[{i}] ERROR: {e}") | |
| rows.append((i, line, f"ERROR: {e}", -1.0)) | |
| continue | |
| rows.append((i, line, italian, ms)) | |
| print(f"[{i}] {ms:8.1f} ms | EN: {line[:80]}{'…' if len(line) > 80 else ''}") | |
| print(f" IT: {italian[:120]}{'…' if len(italian) > 120 else ''}\n") | |
| times = [r[3] for r in rows if r[3] >= 0] | |
| if times: | |
| print( | |
| f"---\nDone. Per-sentence times: min {min(times):.1f} ms, " | |
| f"max {max(times):.1f} ms, avg {sum(times) / len(times):.1f} ms" | |
| ) | |
| # TSV: index, ms, english, italian (tabs escaped in fields not needed if we quote carefully) | |
| with out_path.open("w", encoding="utf-8") as f: | |
| f.write("index\tms\tenglish\titalian\n") | |
| for idx, en, it, ms in rows: | |
| en_esc = en.replace("\t", " ").replace("\n", " ") | |
| it_esc = it.replace("\t", " ").replace("\n", " ") | |
| f.write(f"{idx}\t{ms:.3f}\t{en_esc}\t{it_esc}\n") | |
| print(f"Wrote {out_path}") | |
| if __name__ == "__main__": | |
| main() | |