Update app.py
Browse files
app.py
CHANGED
|
@@ -53,7 +53,7 @@ input_text = st.text_input("Enter Text:", "Hello, how are you?")
|
|
| 53 |
tokens = tokenizer.tokenize(input_text)
|
| 54 |
st.write("Tokenized Output:", tokens)
|
| 55 |
|
| 56 |
-
# Token Embeddings Visualization (PCA Projection)
|
| 57 |
st.subheader("🧩 Token Embeddings Visualization")
|
| 58 |
with torch.no_grad():
|
| 59 |
inputs = tokenizer(input_text, return_tensors="pt")
|
|
@@ -61,6 +61,12 @@ with torch.no_grad():
|
|
| 61 |
|
| 62 |
if hasattr(outputs, "last_hidden_state"):
|
| 63 |
embeddings = outputs.last_hidden_state.squeeze(0).numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
pca = PCA(n_components=2)
|
| 65 |
reduced_embeddings = pca.fit_transform(embeddings)
|
| 66 |
|
|
|
|
| 53 |
tokens = tokenizer.tokenize(input_text)
|
| 54 |
st.write("Tokenized Output:", tokens)
|
| 55 |
|
| 56 |
+
# Token Embeddings Visualization (Fixed PCA Projection)
|
| 57 |
st.subheader("🧩 Token Embeddings Visualization")
|
| 58 |
with torch.no_grad():
|
| 59 |
inputs = tokenizer(input_text, return_tensors="pt")
|
|
|
|
| 61 |
|
| 62 |
if hasattr(outputs, "last_hidden_state"):
|
| 63 |
embeddings = outputs.last_hidden_state.squeeze(0).numpy()
|
| 64 |
+
|
| 65 |
+
# Ensure the number of tokens and embeddings match
|
| 66 |
+
n_tokens = min(len(tokens), embeddings.shape[0])
|
| 67 |
+
embeddings = embeddings[:n_tokens] # Trim embeddings to match token count
|
| 68 |
+
tokens = tokens[:n_tokens] # Trim tokens to match embeddings count
|
| 69 |
+
|
| 70 |
pca = PCA(n_components=2)
|
| 71 |
reduced_embeddings = pca.fit_transform(embeddings)
|
| 72 |
|