Spaces:
Runtime error
Runtime error
| import logging | |
| import datasets | |
| from findkit import indexes | |
| import gradio as gr | |
| logging.basicConfig(level="INFO") | |
| def get_html_retrieval_results(retrieval_result, show_only_one_match_per_episode): | |
| if show_only_one_match_per_episode: | |
| retrieval_result = retrieval_result.drop_duplicates(subset=["episode"]) | |
| if len(retrieval_result) > 0: | |
| retrieval_result_html = retrieval_result.to_html(render_links=True, index=False) | |
| return retrieval_result_html | |
| else: | |
| return "" | |
| def get_retrieval_results(findkit_index, query, n_retrieved_results): | |
| retrieval_results_df = findkit_index.find_similar(query, n_retrieved_results) | |
| return retrieval_results_df.rename({"distance": "bm25_score"}) | |
| def setup_df(): | |
| podcast_dataset = datasets.load_dataset("lambdaofgod/lex_fridman_podcast")["train"] | |
| df = podcast_dataset.to_pandas() | |
| return df.dropna() | |
| def setup_index(): | |
| df = setup_df() | |
| return indexes.InMemoryBM25Index.build(df["text"], df) | |
| findkit_index = setup_index() | |
| def show_retrieval_results(query, n_retrieved_results, show_only_one_match_per_episode): | |
| retrieval_results_df = get_retrieval_results( | |
| findkit_index, query, n_retrieved_results | |
| ) | |
| return get_html_retrieval_results( | |
| retrieval_results_df, show_only_one_match_per_episode | |
| ) | |
| show_only_one_match_per_episode = gr.Checkbox( | |
| label="show only one match per episode", value=False | |
| ) | |
| n_retrieved_results = gr.Number(label="number of results", value=10, precision=0) | |
| query = gr.Textbox(label="input query", value="artificial life") | |
| demo = gr.Interface( | |
| fn=show_retrieval_results, | |
| inputs=[query, n_retrieved_results, show_only_one_match_per_episode], | |
| outputs="html", | |
| ) | |
| demo.launch() | |