Spaces:
Sleeping
Sleeping
sickcell69 commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,15 +23,22 @@ embeddings_path = 'corpus_embeddings.pt'
|
|
| 23 |
corpus_embeddings = torch.load(embeddings_path, map_location=torch.device('cpu'))
|
| 24 |
|
| 25 |
def semantic_search(query):
|
|
|
|
|
|
|
| 26 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
| 27 |
-
search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=
|
| 28 |
|
| 29 |
results = []
|
| 30 |
for hit in search_hits[0]:
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
return
|
| 35 |
|
| 36 |
iface = gr.Interface(
|
| 37 |
fn=semantic_search,
|
|
|
|
| 23 |
corpus_embeddings = torch.load(embeddings_path, map_location=torch.device('cpu'))
|
| 24 |
|
| 25 |
def semantic_search(query):
|
| 26 |
+
print("Data columns:", data.columns)
|
| 27 |
+
print("First few rows:", data.head())
|
| 28 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
| 29 |
+
search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=3)
|
| 30 |
|
| 31 |
results = []
|
| 32 |
for hit in search_hits[0]:
|
| 33 |
+
# 使用更安全的方法來訪問數據
|
| 34 |
+
row = data.iloc[hit['corpus_id']]
|
| 35 |
+
if 'tokens' in row:
|
| 36 |
+
text = " ".join(row['tokens'])
|
| 37 |
+
else:
|
| 38 |
+
text = str(row) # 如果沒有 'tokens',就轉換整行為字符串
|
| 39 |
+
results.append((hit['score'], text))
|
| 40 |
|
| 41 |
+
return results
|
| 42 |
|
| 43 |
iface = gr.Interface(
|
| 44 |
fn=semantic_search,
|