Spaces:
Sleeping
Sleeping
sickcell69
commited on
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,8 @@
|
|
| 1 |
-
|
| 2 |
import pandas as pd
|
| 3 |
from sentence_transformers import SentenceTransformer, util
|
| 4 |
import torch
|
| 5 |
|
| 6 |
-
|
| 7 |
# 載入語義搜索模型
|
| 8 |
model_checkpoint = "sickcell69/cti-semantic-search-minilm"
|
| 9 |
model = SentenceTransformer(model_checkpoint)
|
|
@@ -16,27 +15,25 @@ data = pd.read_json(data_path)
|
|
| 16 |
embeddings_path = 'corpus_embeddings.pt'
|
| 17 |
corpus_embeddings = torch.load(embeddings_path)
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
@app.route('/')
|
| 22 |
-
def home():
|
| 23 |
-
return render_template('index.html')
|
| 24 |
-
|
| 25 |
-
@app.route('/search', methods=['GET'])
|
| 26 |
-
def search():
|
| 27 |
-
query = request.args.get('query')
|
| 28 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
| 29 |
search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
|
| 30 |
-
|
| 31 |
results = []
|
| 32 |
for hit in search_hits[0]:
|
| 33 |
text = " ".join(data.iloc[hit['corpus_id']]['tokens'])
|
| 34 |
-
results.append({
|
| 35 |
-
"text": text,
|
| 36 |
-
"score": hit['score']
|
| 37 |
-
})
|
| 38 |
|
| 39 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
if __name__ == "__main__":
|
| 42 |
-
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
from sentence_transformers import SentenceTransformer, util
|
| 4 |
import torch
|
| 5 |
|
|
|
|
| 6 |
# 載入語義搜索模型
|
| 7 |
model_checkpoint = "sickcell69/cti-semantic-search-minilm"
|
| 8 |
model = SentenceTransformer(model_checkpoint)
|
|
|
|
| 15 |
embeddings_path = 'corpus_embeddings.pt'
|
| 16 |
corpus_embeddings = torch.load(embeddings_path)
|
| 17 |
|
| 18 |
+
def semantic_search(query):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
| 20 |
search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
|
| 21 |
+
|
| 22 |
results = []
|
| 23 |
for hit in search_hits[0]:
|
| 24 |
text = " ".join(data.iloc[hit['corpus_id']]['tokens'])
|
| 25 |
+
results.append(f"Score: {hit['score']:.4f} - Text: {text}")
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
+
return "\n".join(results)
|
| 28 |
+
|
| 29 |
+
iface = gr.Interface(
|
| 30 |
+
fn=semantic_search,
|
| 31 |
+
inputs="text",
|
| 32 |
+
outputs="text",
|
| 33 |
+
title="語義搜索應用",
|
| 34 |
+
description="輸入一個查詢,然後模型將返回最相似的結果。"
|
| 35 |
+
)
|
| 36 |
|
| 37 |
if __name__ == "__main__":
|
| 38 |
+
#iface.launch()
|
| 39 |
+
iface.launch(share=True) #網頁跑不出來
|