Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,15 @@ from sentence_transformers import SentenceTransformer
|
|
| 6 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 7 |
from urllib.parse import urlparse
|
| 8 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
st.set_page_config(page_title="404 Redirect Mapper", layout="wide")
|
| 11 |
st.title("🔁 404 Redirect Mapper")
|
|
@@ -72,9 +81,10 @@ if file_404 and file_200:
|
|
| 72 |
urls_200, text_200 = prepare(df_200, url_col_200, text_cols_200)
|
| 73 |
|
| 74 |
status_msg.info("🔄 Generating embeddings...")
|
| 75 |
-
model = SentenceTransformer("
|
| 76 |
-
emb_404 = model.encode(text_404, show_progress_bar=True)
|
| 77 |
-
emb_200 = model.encode(text_200, show_progress_bar=True)
|
|
|
|
| 78 |
status_msg.empty()
|
| 79 |
|
| 80 |
sim_matrix = cosine_similarity(emb_404, emb_200)
|
|
|
|
| 6 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 7 |
from urllib.parse import urlparse
|
| 8 |
import re
|
| 9 |
+
import os, shutil
|
| 10 |
+
|
| 11 |
+
# Prevent Hugging Face cache from exceeding 50 GB
|
| 12 |
+
os.environ["TRANSFORMERS_CACHE"] = "/tmp"
|
| 13 |
+
os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp"
|
| 14 |
+
|
| 15 |
+
# Optional: clean old cache if Space restarts
|
| 16 |
+
for folder in ["/root/.cache", "/tmp"]:
|
| 17 |
+
shutil.rmtree(folder, ignore_errors=True)
|
| 18 |
|
| 19 |
st.set_page_config(page_title="404 Redirect Mapper", layout="wide")
|
| 20 |
st.title("🔁 404 Redirect Mapper")
|
|
|
|
| 81 |
urls_200, text_200 = prepare(df_200, url_col_200, text_cols_200)
|
| 82 |
|
| 83 |
status_msg.info("🔄 Generating embeddings...")
|
| 84 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 85 |
+
emb_404 = model.encode(text_404, show_progress_bar=True, batch_size=32)
|
| 86 |
+
emb_200 = model.encode(text_200, show_progress_bar=True, batch_size=32)
|
| 87 |
+
|
| 88 |
status_msg.empty()
|
| 89 |
|
| 90 |
sim_matrix = cosine_similarity(emb_404, emb_200)
|