flowos's picture
iter 14: hybrid regex + fine-tuned EXAONE 4.0 1.2B (F1=0.930)
1bcb098 verified
import { describe, it, expect } from "vitest";
import { detectEmail } from "./email.js";
describe("Email detection", () => {
it("detects simple email", () => {
const r = detectEmail("contact: foo@example.com");
expect(r).toHaveLength(1);
expect(r[0].value).toBe("foo@example.com");
expect(r[0].type).toBe("EMAIL");
});
it("detects email with dots and dashes in local part", () => {
expect(detectEmail("first.last-name@company.co.kr")).toHaveLength(1);
});
it("detects email with plus addressing", () => {
const r = detectEmail("user+tag@gmail.com");
expect(r).toHaveLength(1);
expect(r[0].value).toBe("user+tag@gmail.com");
});
it("detects multi-label TLD (.co.kr)", () => {
const r = detectEmail("hong@samsung.co.kr");
expect(r).toHaveLength(1);
expect(r[0].value).toBe("hong@samsung.co.kr");
});
it("detects multiple emails in one string", () => {
const r = detectEmail("a@x.com ๊ทธ๋ฆฌ๊ณ  b@y.org");
expect(r).toHaveLength(2);
});
it("detects email inside Korean sentence", () => {
const r = detectEmail("์ œ ์ด๋ฉ”์ผ์€ kim@klawn.io ์ž…๋‹ˆ๋‹ค");
expect(r).toHaveLength(1);
expect(r[0].value).toBe("kim@klawn.io");
});
it("returns empty for plain text", () => {
expect(detectEmail("์ด๋ฉ”์ผ ์—†์Œ")).toHaveLength(0);
});
it("returns empty for bare @ without domain", () => {
expect(detectEmail("hello @ world")).toHaveLength(0);
});
it("returns empty for domain without TLD", () => {
expect(detectEmail("user@localhost")).toHaveLength(0);
});
it("reports correct start/end positions", () => {
const text = "before foo@bar.com after";
const r = detectEmail(text);
expect(r).toHaveLength(1);
expect(text.slice(r[0].start, r[0].end)).toBe("foo@bar.com");
});
});