File size: 772 Bytes
3ccf31a
 
cc63aca
3ccf31a
 
 
 
 
 
 
 
cc63aca
 
3ccf31a
 
 
 
 
 
cc63aca
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA

def visualize_pca(df_filtered):
    vectorizer = TfidfVectorizer(max_features=1000)
    X = vectorizer.fit_transform(df_filtered["cleaned_text"])

    pca = PCA(n_components=2, random_state=42)
    X_pca = pca.fit_transform(X.toarray())

    fig, ax = plt.subplots(figsize=(10, 7))
    scatter = ax.scatter(
        X_pca[:, 0],
        X_pca[:, 1],
        c=df_filtered["reviews.rating"],
        cmap="viridis",
        alpha=0.6,
    )
    cbar = fig.colorbar(scatter, ax=ax)
    cbar.set_label("Review Rating")
    ax.set_title("PCA visualization of Amazon Reviews")
    ax.set_xlabel("PCA 1")
    ax.set_ylabel("PCA 2")
    return fig