Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import sys
|
|
| 3 |
import faiss
|
| 4 |
import numpy as np
|
| 5 |
import streamlit as st
|
|
|
|
| 6 |
from text2vec import SentenceModel
|
| 7 |
from src.jsonl_Indexer import JSONLIndexer
|
| 8 |
|
|
@@ -32,8 +33,6 @@ DEFAULT_CONFIG = {
|
|
| 32 |
# 合并默认配置和命令行参数
|
| 33 |
config = DEFAULT_CONFIG.copy()
|
| 34 |
config.update(cli_args)
|
| 35 |
-
|
| 36 |
-
# 将 vector_size 转换为整数
|
| 37 |
config['vector_size'] = int(config['vector_size'])
|
| 38 |
|
| 39 |
@st.cache_resource
|
|
@@ -68,24 +67,55 @@ st.title("JSONL Data Retrieval Visualization")
|
|
| 68 |
st.write("该应用基于预计算的 JSONL 文件 embedding,输入查询后将检索相似记录。")
|
| 69 |
|
| 70 |
# 查询输入
|
| 71 |
-
query = st.text_input("
|
| 72 |
-
top_k = st.slider("
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# 检索并展示结果
|
| 75 |
-
if st.button("
|
| 76 |
-
#
|
| 77 |
rec_ids, scores = retriever.search_return_id(query, top_k)
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import faiss
|
| 4 |
import numpy as np
|
| 5 |
import streamlit as st
|
| 6 |
+
import pandas as pd
|
| 7 |
from text2vec import SentenceModel
|
| 8 |
from src.jsonl_Indexer import JSONLIndexer
|
| 9 |
|
|
|
|
| 33 |
# 合并默认配置和命令行参数
|
| 34 |
config = DEFAULT_CONFIG.copy()
|
| 35 |
config.update(cli_args)
|
|
|
|
|
|
|
| 36 |
config['vector_size'] = int(config['vector_size'])
|
| 37 |
|
| 38 |
@st.cache_resource
|
|
|
|
| 67 |
st.write("该应用基于预计算的 JSONL 文件 embedding,输入查询后将检索相似记录。")
|
| 68 |
|
| 69 |
# 查询输入
|
| 70 |
+
query = st.text_input("请输入搜索查询:")
|
| 71 |
+
top_k = st.slider("选择展示的结果数量", min_value=1, max_value=100, value=5)
|
| 72 |
+
# 创建两列布局
|
| 73 |
+
col1, col2 = st.columns([2.5, 1])
|
| 74 |
+
with col1:
|
| 75 |
+
# 搜索输入框
|
| 76 |
+
query = st.text_input(
|
| 77 |
+
"请输入搜索查询:", placeholder="your query", help=""
|
| 78 |
+
)
|
| 79 |
+
with col2:
|
| 80 |
+
# TopK选择滑块
|
| 81 |
+
topk = st.slider(
|
| 82 |
+
"Top K", 1, 100, 50, help="choose the number of results to display"
|
| 83 |
+
)
|
| 84 |
# 检索并展示结果
|
| 85 |
+
if st.button("query") and query:
|
| 86 |
+
# 调用检索方法,返回JSON中id字段和对应的相似度得分
|
| 87 |
rec_ids, scores = retriever.search_return_id(query, top_k)
|
| 88 |
+
|
| 89 |
+
# 将检索结果构造成 DataFrame
|
| 90 |
+
results_df = pd.DataFrame({
|
| 91 |
+
"tool": rec_ids,
|
| 92 |
+
"relevance": scores
|
| 93 |
+
})
|
| 94 |
+
|
| 95 |
+
st.subheader("🗂️ 检索结果详情")
|
| 96 |
+
|
| 97 |
+
# 为 DataFrame 添加样式(交替行背景色)
|
| 98 |
+
styled_results = results_df.style.apply(
|
| 99 |
+
lambda x: [
|
| 100 |
+
"background-color: #F7F7F7" if i % 2 == 0 else "background-color: #FFFFFF"
|
| 101 |
+
for i in range(len(x))
|
| 102 |
+
],
|
| 103 |
+
axis=0,
|
| 104 |
+
).format({"relevance": "{:.4f}"})
|
| 105 |
+
|
| 106 |
+
# 使用交互式数据表格展示结果,并配置列样式
|
| 107 |
+
st.dataframe(
|
| 108 |
+
styled_results,
|
| 109 |
+
column_config={
|
| 110 |
+
"tool": st.column_config.TextColumn("tool", help="检索到的tool", width="medium"),
|
| 111 |
+
"relevance": st.column_config.ProgressColumn(
|
| 112 |
+
"relevance",
|
| 113 |
+
help="记录与查询的匹配程度",
|
| 114 |
+
format="%.4f",
|
| 115 |
+
min_value=0,
|
| 116 |
+
max_value=float(max(scores)) if len(scores) > 0 else 1,
|
| 117 |
)
|
| 118 |
+
},
|
| 119 |
+
hide_index=True,
|
| 120 |
+
use_container_width=True,
|
| 121 |
+
)
|