openagi-agi's picture
Upload 8 files
7cd7caf verified
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
# ===== Load precomputed embeddings =====
emb_data = torch.load("chat_embeddings.pt")
x_embeddings = emb_data["source"] # [N, D]
y_embeddings = emb_data["target"] # [N, D]
print("Source shape:", x_embeddings.shape)
print("Target shape:", y_embeddings.shape)
embedding_dim = x_embeddings.shape[1]
num_samples = x_embeddings.shape[0]
# ===== Device =====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
x_embeddings = x_embeddings.to(device)
y_embeddings = y_embeddings.to(device)
# ===== Define model =====
class SemanticMapper(nn.Module):
def __init__(self, dim):
super().__init__()
self.net = nn.Sequential(
nn.Linear(dim, dim * 2),
nn.ReLU(),
nn.Linear(dim * 2, dim)
)
def forward(self, x):
return self.net(x)
model = SemanticMapper(embedding_dim).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CosineEmbeddingLoss()
# ===== Training config =====
epochs = 20
batch_size = 64
loss_history = []
# ===== Training loop =====
for epoch in range(epochs):
perm = torch.randperm(num_samples, device=device)
epoch_loss = 0.0
for i in range(0, num_samples, batch_size):
idx = perm[i:i + batch_size]
x_batch = x_embeddings[idx]
y_batch = y_embeddings[idx]
target = torch.ones(x_batch.size(0), device=device) # cosine target = +1
y_pred = model(x_batch)
loss = criterion(y_pred, y_batch, target)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss += loss.item()
avg_loss = epoch_loss / (num_samples / batch_size)
loss_history.append(avg_loss)
print(f"Epoch {epoch + 1}/{epochs} - Loss: {avg_loss:.6f}")
# ===== Plot loss curve =====
plt.plot(loss_history, marker="o")
plt.title("Training Loss (Cosine Similarity)")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()
# Save the trained model
torch.save(model.state_dict(), "semantic_mapper.pth")