| | import streamlit as st |
| | from models.quotes_search_engine import QuoteSearchEngine |
| | from models.data_reader import load_quotes_from_csv |
| |
|
| | |
| | @st.cache_data |
| | def load_quotes(): |
| | |
| | quotes_path = "hf://datasets/jstet/quotes-500k/quotes.csv" |
| | return load_quotes_from_csv(quotes_path) |
| |
|
| | |
| | @st.cache_resource |
| | def get_search_engine(): |
| | search_engine = QuoteSearchEngine() |
| | quotes = load_quotes() |
| | search_engine.add_quotes_to_vector_database(quotes) |
| | return search_engine |
| |
|
| | |
| | search_engine = get_search_engine() |
| |
|
| | |
| | st.title("Quote Search Engine") |
| | st.write("Search for similar quotes using the local search engine.") |
| |
|
| | |
| | query_input = st.text_input("Enter your quote or phrase:") |
| |
|
| | |
| | k = st.number_input("Number of similar quotes to retrieve:", min_value=1, max_value=10, value=3) |
| |
|
| | |
| | if st.button("Search Quotes"): |
| | if query_input: |
| | print(f'Search engine is searching the most similar quotes for query {query_input}') |
| | similar_quotes, distances = search_engine.most_similar(query_input, top_k=k) |
| | print(f'Those are: {similar_quotes}, {distances}') |
| | |
| | st.write(f"Search Results: ") |
| | for i, (prompt, distance) in enumerate(zip(similar_quotes, distances)): |
| | st.write(f"{i+1}. Quote: {prompt}, Cosine similarity: {distance:.3f}") |
| | print(f'Those are: {prompt}, {distance}') |
| | else: |
| | st.error("Please enter a quote or phrase.") |
| |
|
| | |
| | st.write("---") |
| | st.write("### Vector Similarities") |
| |
|
| | if st.button("Retrieve All Vector Similarities"): |
| | if query_input: |
| | query_embedding = search_engine.model.encode([query_input]) |
| | all_similarities = search_engine.cosine_similarity(query_embedding, search_engine.index) |
| | st.write(f"Vector Similarities: {all_similarities}") |
| | else: |
| | st.error("Please enter a quote or phrase.") |