Spaces:

croeasusking
/

Articles_Retrieve

Sleeping

App Files Files Community

croeasusking commited on May 6, 2025

Commit

685c014

verified ·

1 Parent(s): 34a3208

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -15

app.py CHANGED Viewed

@@ -14,6 +14,9 @@ df["combined_text"] = df["Title"].astype(str) + " " + df["Description"].astype(s
 # Load model
 model = SentenceTransformer("all-MiniLM-L6-v2")
 # Function to retrieve top-N records
 # def retrieve_records(query, top_n):
 #     text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
@@ -26,23 +29,22 @@ model = SentenceTransformer("all-MiniLM-L6-v2")
 #     return top_results[["Title", "Description", "Date", "Link", 'similarity']]
-def retrieve_records_html(query, min_date, top_n):
-    text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
     query_embedding = model.encode([query], convert_to_tensor=False)
     scores = cosine_similarity(query_embedding, text_embeddings).flatten()
     df["similarity"] = scores
     top_results = df.sort_values(by=['similarity', 'Date'], ascending=[False, False]).head(top_n)
-    html = "<table><tr><th>Title</th><th>Description</th><th>Date</th><th>Link</th></tr>"
     for _, row in top_results.iterrows():
-        html += f"<tr><td>{row['Title']}</td>"
-        html += f"<td>{row['Description']}</td>"
-        html += f"<td>{row['Date'].strftime('%Y-%m-%d')}</td>"
-        html += f"<td><a href='{row['Link']}' target='_blank'>Open Link</a></td></tr>"
-    html += "</table>"
-    return html
@@ -62,17 +64,16 @@ def retrieve_records_html(query, min_date, top_n):
 iface = gr.Interface(
-    fn=retrieve_records_html,
     inputs=[
         gr.Textbox(label="Enter your query"),
         gr.Slider(5, 15, step=5, label="Top N results")
     ],
-    outputs=gr.HTML(),
-    title="Top Article Retriever",
-    description="Search articles using query."
 )
 if __name__ == "__main__":
     iface.launch()

 # Load model
 model = SentenceTransformer("all-MiniLM-L6-v2")
+text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
 # Function to retrieve top-N records
 # def retrieve_records(query, top_n):
 #     text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
 #     return top_results[["Title", "Description", "Date", "Link", 'similarity']]
+def retrieve_records(query, min_date, top_n):
     query_embedding = model.encode([query], convert_to_tensor=False)
     scores = cosine_similarity(query_embedding, text_embeddings).flatten()
     df["similarity"] = scores
     top_results = df.sort_values(by=['similarity', 'Date'], ascending=[False, False]).head(top_n)
+    markdown_output = ""
     for _, row in top_results.iterrows():
+        markdown_output += f"### [{row['Title']}]({row['Link']})\n"
+        markdown_output += f"**Date**: {row['Date'].strftime('%Y-%m-%d')}\n\n"
+        markdown_output += f"{row['Description']}\n\n---\n"
+    return markdown_output
 iface = gr.Interface(
+    fn=retrieve_records,
     inputs=[
         gr.Textbox(label="Enter your query"),
         gr.Slider(5, 15, step=5, label="Top N results")
     ],
+    outputs=gr.Markdown(label="Top Similar Records"),
+    title="Top-N Article Retriever with Clickable Links"
 )
+iface.launch()
 if __name__ == "__main__":
     iface.launch()