|
|
import streamlit as st |
|
|
from sentence_transformers import SentenceTransformer |
|
|
from sklearn.metrics.pairwise import cosine_similarity |
|
|
from sklearn.decomposition import PCA |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import plotly.express as px |
|
|
import os |
|
|
|
|
|
os.environ["HF_HOME"] = "/app/hf_cache" |
|
|
os.environ["TRANSFORMERS_CACHE"] = "/app/hf_cache" |
|
|
os.environ["XDG_CACHE_HOME"] = "/app/hf_cache" |
|
|
os.environ["HOME"] = "/app" |
|
|
|
|
|
|
|
|
st.title("Keyword Cosine Similarity Tool") |
|
|
|
|
|
|
|
|
st.markdown( |
|
|
""" |
|
|
**Purpose:** |
|
|
Elevate the most semantically relevant queries from keyword research. |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
st.header("Input Parameters") |
|
|
primary_keyword = st.text_input("Primary Keyword", placeholder="Enter your primary keyword") |
|
|
st.text_area( |
|
|
"Keywords to Compare", |
|
|
placeholder="Enter keywords separated by new lines or commas", |
|
|
help="You can input keywords on separate lines or separated by commas. Any keywords with commas will be treated as separate queries.", |
|
|
key="keywords" |
|
|
) |
|
|
keywords = st.session_state.get("keywords", "") |
|
|
|
|
|
|
|
|
with st.expander("ℹ️ Instructions (click for details)"): |
|
|
st.markdown( |
|
|
""" |
|
|
**How to use this tool:** |
|
|
1. Enter your **Primary Keyword**. |
|
|
2. Provide a list of **Keywords to Compare**. |
|
|
3. Click **Calculate Similarities** to compute and rank your keywords by relevance. |
|
|
|
|
|
**Output:** |
|
|
- Sorted table of keywords by cosine similarity. |
|
|
- Interactive 3D PCA plot of keyword embeddings. |
|
|
- Option to download results. |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
if st.button("Calculate Similarities"): |
|
|
if not primary_keyword or not keywords: |
|
|
st.error("Please provide both the primary keyword and keywords to compare.") |
|
|
else: |
|
|
keyword_list = [kw.strip() for kw in keywords.replace(",", "\n").split("\n") if kw.strip()] |
|
|
model_name = "mixedbread-ai/mxbai-embed-large-v1" |
|
|
|
|
|
try: |
|
|
st.info(f"Loading model: {model_name}") |
|
|
model = SentenceTransformer(model_name) |
|
|
except Exception as e: |
|
|
st.error(f"Failed to load model: {e}") |
|
|
st.stop() |
|
|
|
|
|
try: |
|
|
st.info("Generating embeddings...") |
|
|
all_texts = [primary_keyword] + keyword_list |
|
|
embeddings = model.encode(all_texts, normalize_embeddings=True) |
|
|
|
|
|
|
|
|
mrl_embeddings = embeddings[:, :256] |
|
|
|
|
|
primary_embedding = mrl_embeddings[0] |
|
|
keyword_embeddings = mrl_embeddings[1:] |
|
|
except Exception as e: |
|
|
st.error(f"Embedding failed: {e}") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
st.info("Calculating cosine similarities...") |
|
|
similarities = cosine_similarity([primary_embedding], keyword_embeddings)[0] |
|
|
results = [{"Keyword": kw, "Cosine Similarity": sim} for kw, sim in zip(keyword_list, similarities)] |
|
|
sorted_results = sorted(results, key=lambda x: x["Cosine Similarity"], reverse=True) |
|
|
|
|
|
df_results = pd.DataFrame(sorted_results) |
|
|
|
|
|
|
|
|
st.header("Results") |
|
|
|
|
|
st.download_button( |
|
|
label="📥 Download Results as CSV", |
|
|
data=df_results.to_csv(index=False), |
|
|
file_name="cosine_similarity_results.csv", |
|
|
mime="text/csv" |
|
|
) |
|
|
|
|
|
st.dataframe(df_results) |
|
|
|
|
|
|
|
|
st.subheader("3D PCA of Embeddings") |
|
|
pca = PCA(n_components=3) |
|
|
pca_result = pca.fit_transform(mrl_embeddings) |
|
|
|
|
|
pca_df = pd.DataFrame(pca_result, columns=["PC1", "PC2", "PC3"]) |
|
|
pca_df["Label"] = ["Primary"] + keyword_list |
|
|
fig = px.scatter_3d(pca_df, x="PC1", y="PC2", z="PC3", color="Label", text="Label") |
|
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
with st.expander("🔧 Technical Details (click to expand)"): |
|
|
st.write("Primary Embedding:", primary_embedding.tolist()) |
|
|
st.write("Keyword Embeddings:", keyword_embeddings.tolist()) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown("Created by [Ryland Bacorn](https://huggingface.co/ReithBjarkan). Report a [bug or make a suggestion](mailto:rybacorn@gmail.com)") |
|
|
|