import torch from torch.utils.data import Dataset, DataLoader from transformers import MT5ForConditionalGeneration, MT5Tokenizer, AdamW from transformers import AutoModel, AutoTokenizer from sklearn.metrics.pairwise import cosine_similarity import pandas as pd import matplotlib.pyplot as plt import numpy as np from huggingface_hub import HfApi, HfFolder, Repository, notebook_login, create_repo, upload_folder import os import shutil # ========== CONFIG ========== HF_USERNAME = "aarath97" HF_REPO = "mt5-dogri-translation" MODEL_NAME = "google/mt5-large" BATCH_SIZE = 2 LR = 1e-5 DPO_STEPS = 100 HGRL_STEPS = 100 COMBINED_STEPS = 50 GAMMA = 3.5 ALPHA = 0.5 BETA = 0.5 # ========== LOAD DATA ========== df = pd.read_excel("dogri_train.xlsx") train_data = list(zip(df['Dogri'], df['English'], df['Unpreffered'])) # ========== TOKENIZERS ========== tokenizer = MT5Tokenizer.from_pretrained(MODEL_NAME) sbert = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") sbert_tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") # ========== UTILITIES ========== def compute_similarity(sent1, sent2): emb1 = sbert(**sbert_tokenizer(sent1, return_tensors='pt')).last_hidden_state.mean(1) emb2 = sbert(**sbert_tokenizer(sent2, return_tensors='pt')).last_hidden_state.mean(1) return cosine_similarity(emb1.detach().numpy(), emb2.detach().numpy())[0][0] def hyper_gamma_reward(rho): return rho * np.exp(-GAMMA * (1 - rho)) # ========== DATASET ========== class DogriDataset(Dataset): def __init__(self, data): self.data = data def __len__(self): return len(self.data) def __getitem__(self, idx): return self.data[idx] dataloader = DataLoader(DogriDataset(train_data), batch_size=BATCH_SIZE, shuffle=True) # ========== TRAINING ========== model = MT5ForConditionalGeneration.from_pretrained(MODEL_NAME).to("cuda") optimizer = AdamW(model.parameters(), lr=LR) dpo_losses, hgrl_losses, final_losses = [], [], [] # ---- DPO Training ---- for step in range(DPO_STEPS): batch = next(iter(dataloader)) loss_batch = [] for src, ref, unpref in zip(*batch): input_ids = tokenizer(src, return_tensors='pt', truncation=True, padding=True).input_ids.to("cuda") ref_ids = tokenizer(ref, return_tensors='pt', truncation=True, padding=True).input_ids.to("cuda") unpref_ids = tokenizer(unpref, return_tensors='pt', truncation=True, padding=True).input_ids.to("cuda") ref_logprob = model(input_ids=input_ids, labels=ref_ids).loss unpref_logprob = model(input_ids=input_ids, labels=unpref_ids).loss logit_diff = -ref_logprob.item() + unpref_logprob.item() beta = 1.0 loss = -torch.log(torch.sigmoid(torch.tensor(beta * logit_diff))) loss_batch.append(loss) loss_val = torch.stack(loss_batch).mean() loss_val.backward() optimizer.step() optimizer.zero_grad() dpo_losses.append(loss_val.item()) # ---- HGRL Training ---- for step in range(HGRL_STEPS): batch = next(iter(dataloader)) loss_batch = [] for src, ref, _ in zip(*batch): input_ids = tokenizer(src, return_tensors='pt').input_ids.to("cuda") gen_ids = model.generate(input_ids) gen_text = tokenizer.decode(gen_ids[0], skip_special_tokens=True) rho = compute_similarity(gen_text, ref) reward = hyper_gamma_reward(rho) labels = tokenizer(gen_text, return_tensors='pt').input_ids.to("cuda") logprob = model(input_ids=input_ids, labels=labels).loss loss = -reward * logprob loss_batch.append(loss) loss_val = torch.stack(loss_batch).mean() loss_val.backward() optimizer.step() optimizer.zero_grad() hgrl_losses.append(loss_val.item()) # ---- Combined Training ---- for step in range(COMBINED_STEPS): batch = next(iter(dataloader)) loss_dpo_batch, loss_hgrl_batch = [], [] for src, ref, unpref in zip(*batch): input_ids = tokenizer(src, return_tensors='pt').input_ids.to("cuda") ref_ids = tokenizer(ref, return_tensors='pt').input_ids.to("cuda") unpref_ids = tokenizer(unpref, return_tensors='pt').input_ids.to("cuda") logprob_ref = model(input_ids=input_ids, labels=ref_ids).loss logprob_unpref = model(input_ids=input_ids, labels=unpref_ids).loss dpo_loss = -torch.log(torch.sigmoid(torch.tensor(logprob_unpref.item() - logprob_ref.item()))) loss_dpo_batch.append(dpo_loss) gen_ids = model.generate(input_ids) gen_text = tokenizer.decode(gen_ids[0], skip_special_tokens=True) rho = compute_similarity(gen_text, ref) reward = hyper_gamma_reward(rho) labels = tokenizer(gen_text, return_tensors='pt').input_ids.to("cuda") logprob = model(input_ids=input_ids, labels=labels).loss hgrl_loss = -reward * logprob loss_hgrl_batch.append(hgrl_loss) loss_dpo_mean = torch.stack(loss_dpo_batch).mean() loss_hgrl_mean = torch.stack(loss_hgrl_batch).mean() combined_loss = ALPHA * loss_dpo_mean + BETA * loss_hgrl_mean combined_loss.backward() optimizer.step() optimizer.zero_grad() final_losses.append(combined_loss.item()) # ========== SAVE OUTPUTS ========== plt.plot(dpo_losses, label="DPO") plt.plot(hgrl_losses, label="HGRL") plt.plot(final_losses, label="Combined") plt.xlabel("Steps") plt.ylabel("Loss") plt.legend() plt.savefig("loss_curve.png") with open("loss_report.txt", "w") as f: f.write("DPO Final Loss: {:.4f}\n".format(dpo_losses[-1])) f.write("HGRL Final Loss: {:.4f}\n".format(hgrl_losses[-1])) f.write("Combined Final Loss: {:.4f}\n".format(final_losses[-1])) # ========== TEST AND SAVE TRANSLATIONS ========== test_df = pd.read_excel("in22conv.xlsx") test_outputs = [] for line in test_df.iloc[:, 0].tolist(): input_ids = tokenizer(line, return_tensors='pt').input_ids.to("cuda") outputs = model.generate(input_ids) translation = tokenizer.decode(outputs[0], skip_special_tokens=True) test_outputs.append(translation) output_df = pd.DataFrame({"Dogri": test_df.iloc[:, 0], "English": test_outputs}) output_df.to_excel("translated_output.xlsx", index=False) # ========== PUSH TO HUGGING FACE ========== model.save_pretrained("mt5-dogri") tokenizer.save_pretrained("mt5-dogri") create_repo(f"{HF_USERNAME}/{HF_REPO}", private=False, exist_ok=True) upload_folder(repo_id=f"{HF_USERNAME}/{HF_REPO}", folder_path="mt5-dogri") print("Model uploaded successfully!")