| import gradio as gr |
| from wordllama import WordLlama |
|
|
| |
| wl = WordLlama.load() |
|
|
| |
| def calculate_similarity(text1, text2): |
| score = wl.similarity(text1, text2) |
| return f"Similarity Score: {score}" |
|
|
| |
| def rank_documents(query, candidates): |
| candidates_list = candidates.split(";") |
| ranked_docs = wl.rank(query, candidates_list) |
| return ranked_docs |
|
|
| |
| def deduplicate_docs(candidates, threshold): |
| candidates_list = candidates.split(";") |
| deduplicated_docs = wl.deduplicate(candidates_list, threshold=threshold) |
| return deduplicated_docs |
|
|
| |
| def cluster_docs(docs, k, max_iterations, tolerance): |
| docs_list = docs.split(";") |
| clusters = wl.cluster(docs_list, k=k, max_iterations=max_iterations, tolerance=tolerance) |
| return clusters |
|
|
| |
| with gr.Blocks(css="style.css") as demo: |
| gr.Markdown("# WordLlama App") |
| |
| |
| with gr.Tab("Similarity"): |
| gr.Markdown("### Calculate Similarity between two texts") |
| text1 = gr.Textbox(label="Text 1") |
| text2 = gr.Textbox(label="Text 2") |
| similarity_output = gr.Textbox(label="Similarity Score") |
| similarity_button = gr.Button("Calculate Similarity") |
| similarity_button.click(calculate_similarity, inputs=[text1, text2], outputs=similarity_output) |
| |
| |
| with gr.Tab("Rank Documents"): |
| gr.Markdown("### Rank documents based on a query") |
| query = gr.Textbox(label="Query") |
| candidates = gr.Textbox(label="Candidates (separate by semicolons)") |
| rank_output = gr.JSON(label="Ranked Documents") |
| rank_button = gr.Button("Rank Documents") |
| rank_button.click(rank_documents, inputs=[query, candidates], outputs=rank_output) |
| |
| |
| with gr.Tab("Fuzzy Deduplication"): |
| gr.Markdown("### Deduplicate similar documents") |
| candidates = gr.Textbox(label="Candidates (separate by semicolons)") |
| threshold = gr.Slider(0.0, 1.0, value=0.8, label="Threshold") |
| deduplicate_output = gr.JSON(label="Deduplicated Documents") |
| deduplicate_button = gr.Button("Deduplicate") |
| deduplicate_button.click(deduplicate_docs, inputs=[candidates, threshold], outputs=deduplicate_output) |
| |
| |
| with gr.Tab("Clustering"): |
| gr.Markdown("### Cluster documents") |
| docs = gr.Textbox(label="Documents (separate by semicolons)") |
| k = gr.Number(label="Number of Clusters", value=5) |
| max_iterations = gr.Number(label="Max Iterations", value=100) |
| tolerance = gr.Number(label="Tolerance", value=1e-4) |
| cluster_output = gr.JSON(label="Clusters") |
| cluster_button = gr.Button("Cluster Documents") |
| cluster_button.click(cluster_docs, inputs=[docs, k, max_iterations, tolerance], outputs=cluster_output) |
|
|
| demo.launch() |
|
|