import torch from torch.utils.data import Dataset, DataLoader from transformers import BertTokenizer, BertForTokenClassification, AdamW from sklearn.model_selection import train_test_split import gradio as gr import random from faker import Faker import html import json import numpy as np from tqdm import tqdm import os # Constants MAX_LENGTH = 512 BATCH_SIZE = 16 EPOCHS = 5 LEARNING_RATE = 2e-5 fake = Faker() def generate_employee(): name = fake.name() job = fake.job() ext = f"ext. {random.randint(1000, 9999)}" email = f"{name.lower().replace(' ', '.')}@example.com" return name, job, ext, email def generate_html_content(num_employees=3): employees = [generate_employee() for _ in range(num_employees)] html_content = f"""
{html.escape(name)}
{html.escape(job)}
{html.escape(ext)}
Send Email