croeasusking commited on
Commit
91c172b
·
verified ·
1 Parent(s): 7fba2ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py CHANGED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ from datetime import datetime
4
+ from sentence_transformers import SentenceTransformer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+
7
+ # Load dataset (ensure the file is in the same directory)
8
+ df = pd.read_csv("analytics_vidhya_articles.csv", parse_dates=["Date"])
9
+
10
+ # Combine Title and Description for similarity search
11
+ df["combined_text"] = df["Title"].astype(str) + " " + df["Description"].astype(str)
12
+
13
+ # Load sentence transformer model
14
+ model = SentenceTransformer("all-MiniLM-L6-v2")
15
+
16
+ # Function to retrieve top-N records
17
+ def retrieve_records(query, min_date, top_n):
18
+ # Filter by date
19
+ filtered_df = df[df["Date"] >= pd.to_datetime(min_date)]
20
+
21
+ if filtered_df.empty or not query.strip():
22
+ return pd.DataFrame(columns=["Title", "Description", "Date", "Link"])
23
+
24
+ # Compute embeddings
25
+ text_embeddings = model.encode(filtered_df["combined_text"].tolist(), convert_to_tensor=False)
26
+ query_embedding = model.encode([query], convert_to_tensor=False)
27
+
28
+ # Compute cosine similarity
29
+ scores = cosine_similarity([query_embedding], text_embeddings)[0]
30
+ filtered_df = filtered_df.copy()
31
+ filtered_df["similarity"] = scores
32
+
33
+ # Return top-N results
34
+ top_results = filtered_df.sort_values(by="similarity", ascending=False).head(top_n)
35
+ return top_results[["Title", "Description", "Date", "Link"]]
36
+
37
+ # Gradio interface
38
+ iface = gr.Interface(
39
+ fn=retrieve_records,
40
+ inputs=[
41
+ gr.Textbox(label="Enter your query"),
42
+ gr.Textbox(label="Minimum date (YYYY-MM-DD)", value=str(datetime.today().date())),
43
+ gr.Slider(5, 15, value=5, step=5, label="Top N results")
44
+ ],
45
+ outputs=gr.Dataframe(label="Top Similar Records"),
46
+ title="Top-N Article Retriever",
47
+ description="Search articles using Title and Description similarity, filtered by a minimum date."
48
+ )
49
+
50
+ iface.launch()