retriever / app.py
Yyy0530's picture
fix
6488af3
import os
import pickle
import faiss
import numpy as np
import pandas as pd
from typing import List, Tuple
from tqdm import tqdm
from text2vec import SentenceModel
import streamlit as st
from src.ExcelIndexer import ExcelIndexer
# Streamlit app
st.title("Excel Data Retrieval Visualization")
st.write("Upload an Excel file and enter a query to retrieve similar entries.")
# 设置默认模型路径和参数
DEFAULT_CONFIG = {
'model_path': 'shibing624/text2vec-base-chinese',
'vector_size': 768,
'id_column': '描述',
}
# 缓存 SentenceModel
@st.cache_resource
def get_model(model_path: str = DEFAULT_CONFIG['model_path']):
model = SentenceModel(model_path)
return model
# 初始化模型
model = get_model()
# 用户上传文件
uploaded_file = st.file_uploader("Upload your Excel file (xlsx format):", type=["xlsx"])
# 如果上传了文件
if uploaded_file:
try:
# 将文件保存到临时路径
dataset_path = "uploaded_file.xlsx"
with open(dataset_path, "wb") as f:
f.write(uploaded_file.read())
# 检索器初始化
@st.cache_resource
def create_retriever(vector_sz: int, dataset_path: str, id_column: str, _model):
retriever = ExcelIndexer(vector_sz=vector_sz, model=_model, embeddings_file=None)
retriever.load_excel(dataset_path, id_column)
return retriever
retriever = create_retriever(
vector_sz=DEFAULT_CONFIG['vector_size'],
dataset_path=dataset_path,
id_column=DEFAULT_CONFIG['id_column'],
_model=model
)
st.success("File uploaded and processed successfully!")
# 用户输入查询
query = st.text_input("Enter a search query:")
top_k = st.slider("Select number of results to display", min_value=1, max_value=100, value=5)
# 搜索和显示结果
if st.button("Search") and query:
texts, scores = retriever.search_return_text(query, top_k)
st.write("### Results:")
with st.expander("检索结果列表 (点击展开)"):
for j, text in enumerate(texts):
st.markdown(
f"""
<div style="border:1px solid #ccc; padding:10px; border-radius:5px; margin-bottom:10px; background-color:#f9f9f9;">
<p><b>Text {j+1}:</b> {text}</p>
<p><b>Score:</b> {scores[j]:.4f}</p>
</div>
""",
unsafe_allow_html=True
)
except Exception as e:
st.error(f"An error occurred: {e}")
else:
st.info("Please upload an Excel file to proceed.")