croeasusking commited on
Commit
1648d60
·
verified ·
1 Parent(s): 734bc2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -23
app.py CHANGED
@@ -13,11 +13,11 @@ df = pd.read_csv("analyticsvidhyacomplete.csv", parse_dates=["Date"])
13
  df['Date'] = pd.to_datetime(df['Date'], format='mixed', dayfirst=True, errors='coerce')
14
  df["combined_text"] = df["Title"].astype(str) + " " + df["Description"].astype(str) + " " + df["Content"].astype(str)
15
 
16
- # Load query CSV with columns: Topic, Subtopic, TopN
17
  query_df = pd.read_csv("query.csv")
18
  query_df.dropna(subset=["Topic", "Subtopic", "TopN"], inplace=True)
19
 
20
- # Build dropdown options
21
  query_df["QueryOption"] = query_df.apply(
22
  lambda row: f"{row['Topic']} - {row['Subtopic']} (TopN: {int(row['TopN'])})", axis=1
23
  )
@@ -33,14 +33,7 @@ text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=F
33
 
34
 
35
  def retrieve_records(selected_query):
36
- # query_embedding = model.encode([query], convert_to_tensor=False)
37
-
38
- # scores = cosine_similarity(query_embedding, text_embeddings).flatten()
39
- # df["similarity"] = scores
40
-
41
- # top_results = df.sort_values(by=['similarity', 'Date'], ascending=[False, False]).head(top_n)
42
 
43
- # Extract Topic, Subtopic, and TopN from dropdown text
44
  match = re.match(r"(.+?) - (.+?) \(TopN: (\d+)\)", selected_query)
45
  if not match:
46
  return "Invalid query format selected."
@@ -69,8 +62,6 @@ def retrieve_records(selected_query):
69
 
70
 
71
 
72
-
73
-
74
  iface = gr.Interface(
75
  fn=retrieve_records,
76
  inputs=[
@@ -81,17 +72,5 @@ iface = gr.Interface(
81
  )
82
 
83
 
84
- # iface = gr.Interface(
85
- # fn=retrieve_records,
86
- # inputs=[
87
- # gr.Textbox(label="Enter your query"),
88
- # gr.Slider(5, 15, step=5, label="Top N results")
89
- # ],
90
- # outputs=gr.Markdown(label="Top Similar Records"),
91
- # title="Top-N Article Retriever with Clickable Links"
92
- # )
93
-
94
-
95
-
96
  if __name__ == "__main__":
97
  iface.launch()
 
13
  df['Date'] = pd.to_datetime(df['Date'], format='mixed', dayfirst=True, errors='coerce')
14
  df["combined_text"] = df["Title"].astype(str) + " " + df["Description"].astype(str) + " " + df["Content"].astype(str)
15
 
16
+ # Loading query CSV
17
  query_df = pd.read_csv("query.csv")
18
  query_df.dropna(subset=["Topic", "Subtopic", "TopN"], inplace=True)
19
 
20
+ # dropdown options
21
  query_df["QueryOption"] = query_df.apply(
22
  lambda row: f"{row['Topic']} - {row['Subtopic']} (TopN: {int(row['TopN'])})", axis=1
23
  )
 
33
 
34
 
35
  def retrieve_records(selected_query):
 
 
 
 
 
 
36
 
 
37
  match = re.match(r"(.+?) - (.+?) \(TopN: (\d+)\)", selected_query)
38
  if not match:
39
  return "Invalid query format selected."
 
62
 
63
 
64
 
 
 
65
  iface = gr.Interface(
66
  fn=retrieve_records,
67
  inputs=[
 
72
  )
73
 
74
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  if __name__ == "__main__":
76
  iface.launch()