Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,6 +14,9 @@ df["combined_text"] = df["Title"].astype(str) + " " + df["Description"].astype(s
|
|
| 14 |
# Load model
|
| 15 |
model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 16 |
|
|
|
|
|
|
|
|
|
|
| 17 |
# Function to retrieve top-N records
|
| 18 |
# def retrieve_records(query, top_n):
|
| 19 |
# text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
|
|
@@ -26,23 +29,22 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
| 26 |
# return top_results[["Title", "Description", "Date", "Link", 'similarity']]
|
| 27 |
|
| 28 |
|
| 29 |
-
def
|
| 30 |
-
text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
|
| 31 |
query_embedding = model.encode([query], convert_to_tensor=False)
|
| 32 |
-
|
| 33 |
scores = cosine_similarity(query_embedding, text_embeddings).flatten()
|
| 34 |
df["similarity"] = scores
|
| 35 |
|
| 36 |
top_results = df.sort_values(by=['similarity', 'Date'], ascending=[False, False]).head(top_n)
|
| 37 |
|
| 38 |
-
|
| 39 |
for _, row in top_results.iterrows():
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
|
| 47 |
|
| 48 |
|
|
@@ -62,17 +64,16 @@ def retrieve_records_html(query, min_date, top_n):
|
|
| 62 |
|
| 63 |
|
| 64 |
iface = gr.Interface(
|
| 65 |
-
fn=
|
| 66 |
inputs=[
|
| 67 |
gr.Textbox(label="Enter your query"),
|
| 68 |
gr.Slider(5, 15, step=5, label="Top N results")
|
| 69 |
],
|
| 70 |
-
outputs=gr.
|
| 71 |
-
title="Top Article Retriever"
|
| 72 |
-
description="Search articles using query."
|
| 73 |
)
|
| 74 |
|
| 75 |
-
|
| 76 |
|
| 77 |
if __name__ == "__main__":
|
| 78 |
iface.launch()
|
|
|
|
| 14 |
# Load model
|
| 15 |
model = SentenceTransformer("all-MiniLM-L6-v2")
|
| 16 |
|
| 17 |
+
text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
# Function to retrieve top-N records
|
| 21 |
# def retrieve_records(query, top_n):
|
| 22 |
# text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
|
|
|
|
| 29 |
# return top_results[["Title", "Description", "Date", "Link", 'similarity']]
|
| 30 |
|
| 31 |
|
| 32 |
+
def retrieve_records(query, min_date, top_n):
|
|
|
|
| 33 |
query_embedding = model.encode([query], convert_to_tensor=False)
|
| 34 |
+
|
| 35 |
scores = cosine_similarity(query_embedding, text_embeddings).flatten()
|
| 36 |
df["similarity"] = scores
|
| 37 |
|
| 38 |
top_results = df.sort_values(by=['similarity', 'Date'], ascending=[False, False]).head(top_n)
|
| 39 |
|
| 40 |
+
markdown_output = ""
|
| 41 |
for _, row in top_results.iterrows():
|
| 42 |
+
markdown_output += f"### [{row['Title']}]({row['Link']})\n"
|
| 43 |
+
markdown_output += f"**Date**: {row['Date'].strftime('%Y-%m-%d')}\n\n"
|
| 44 |
+
markdown_output += f"{row['Description']}\n\n---\n"
|
| 45 |
+
|
| 46 |
+
return markdown_output
|
| 47 |
+
|
| 48 |
|
| 49 |
|
| 50 |
|
|
|
|
| 64 |
|
| 65 |
|
| 66 |
iface = gr.Interface(
|
| 67 |
+
fn=retrieve_records,
|
| 68 |
inputs=[
|
| 69 |
gr.Textbox(label="Enter your query"),
|
| 70 |
gr.Slider(5, 15, step=5, label="Top N results")
|
| 71 |
],
|
| 72 |
+
outputs=gr.Markdown(label="Top Similar Records"),
|
| 73 |
+
title="Top-N Article Retriever with Clickable Links"
|
|
|
|
| 74 |
)
|
| 75 |
|
| 76 |
+
iface.launch()
|
| 77 |
|
| 78 |
if __name__ == "__main__":
|
| 79 |
iface.launch()
|