Flopot2 commited on
Commit
f849f04
·
verified ·
1 Parent(s): 9c11ee2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -6,6 +6,15 @@ from sentence_transformers import SentenceTransformer
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from urllib.parse import urlparse
8
  import re
 
 
 
 
 
 
 
 
 
9
 
10
  st.set_page_config(page_title="404 Redirect Mapper", layout="wide")
11
  st.title("🔁 404 Redirect Mapper")
@@ -72,9 +81,10 @@ if file_404 and file_200:
72
  urls_200, text_200 = prepare(df_200, url_col_200, text_cols_200)
73
 
74
  status_msg.info("🔄 Generating embeddings...")
75
- model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
76
- emb_404 = model.encode(text_404, show_progress_bar=True)
77
- emb_200 = model.encode(text_200, show_progress_bar=True)
 
78
  status_msg.empty()
79
 
80
  sim_matrix = cosine_similarity(emb_404, emb_200)
 
6
  from sklearn.metrics.pairwise import cosine_similarity
7
  from urllib.parse import urlparse
8
  import re
9
+ import os, shutil
10
+
11
+ # Prevent Hugging Face cache from exceeding 50 GB
12
+ os.environ["TRANSFORMERS_CACHE"] = "/tmp"
13
+ os.environ["SENTENCE_TRANSFORMERS_HOME"] = "/tmp"
14
+
15
+ # Optional: clean old cache if Space restarts
16
+ for folder in ["/root/.cache", "/tmp"]:
17
+ shutil.rmtree(folder, ignore_errors=True)
18
 
19
  st.set_page_config(page_title="404 Redirect Mapper", layout="wide")
20
  st.title("🔁 404 Redirect Mapper")
 
81
  urls_200, text_200 = prepare(df_200, url_col_200, text_cols_200)
82
 
83
  status_msg.info("🔄 Generating embeddings...")
84
+ model = SentenceTransformer("all-MiniLM-L6-v2")
85
+ emb_404 = model.encode(text_404, show_progress_bar=True, batch_size=32)
86
+ emb_200 = model.encode(text_200, show_progress_bar=True, batch_size=32)
87
+
88
  status_msg.empty()
89
 
90
  sim_matrix = cosine_similarity(emb_404, emb_200)