croeasusking commited on
Commit
70be1c8
·
verified ·
1 Parent(s): 8822716

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -27
app.py CHANGED
@@ -4,23 +4,27 @@ from datetime import datetime
4
  from sentence_transformers import SentenceTransformer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
 
7
- # Load article dataset
8
- df = pd.read_csv("analyticsvidhyacomplete.csv", parse_dates=["Date"])
9
- df["Date"] = pd.to_datetime(df["Date"], format='mixed', dayfirst=True, errors='coerce')
10
- df["combined_text"] = (
11
- df["Title"].astype(str) + " " +
12
- df["Description"].astype(str) + " " +
13
- df["Content"].astype(str)
14
- )
15
-
16
- # Load SentenceTransformer model and compute embeddings once
17
- model = SentenceTransformer("all-MiniLM-L6-v2")
18
- text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
19
-
20
- # Store results globally
21
  results_dict = {}
22
 
23
- # Create markdown from results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def format_markdown(top_results):
26
  markdown_output = ""
@@ -29,25 +33,23 @@ def format_markdown(top_results):
29
  link = row['Link']
30
  desc = row['Description']
31
  date_str = row['Date'].strftime('%Y-%m-%d') if pd.notnull(row['Date']) else 'N/A'
32
-
33
  markdown_output += f"### [{title}]({link})\n"
34
  markdown_output += f"**Date**: {date_str}\n\n"
35
  markdown_output += f"{desc}\n\n---\n"
36
  return markdown_output
37
 
38
-
39
-
40
- # Auto-query construction and retrieval
41
  def process_query_csv(query_file):
42
  global results_dict
43
  results_dict = {}
44
 
 
 
45
  query_df = pd.read_csv(query_file.name)
46
 
47
  for idx, row in query_df.iterrows():
48
- topic = row["Topic"]
49
- subtopic = row["Subtopic"]
50
- top_n = int(row["TopN"])
51
  query = f"Top {top_n} articles about {subtopic} in {topic}"
52
 
53
  query_embedding = model.encode([query], convert_to_tensor=False)
@@ -59,9 +61,12 @@ def process_query_csv(query_file):
59
  label = f"{topic} - {subtopic} (Top {top_n})"
60
  results_dict[label] = format_markdown(top_results)
61
 
62
- return list(results_dict.keys()), results_dict[list(results_dict.keys())[0]] if results_dict else "No results."
 
 
 
 
63
 
64
- # Show markdown for selected query
65
  def display_result(selected_query):
66
  return results_dict.get(selected_query, "No results found.")
67
 
@@ -70,13 +75,12 @@ with gr.Blocks() as demo:
70
  gr.Markdown("## 📄 Batch Query Article Retriever with Clickable Links")
71
 
72
  query_input = gr.File(label="Upload Query CSV (Topic, Subtopic, TopN)")
73
-
74
  run_btn = gr.Button("Run Retrieval")
75
-
76
  dropdown = gr.Dropdown(label="Select Query")
77
  output_md = gr.Markdown()
78
 
79
  run_btn.click(fn=process_query_csv, inputs=query_input, outputs=[dropdown, output_md])
80
  dropdown.change(fn=display_result, inputs=dropdown, outputs=output_md)
81
 
82
- demo.launch()
 
 
4
  from sentence_transformers import SentenceTransformer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
 
7
+ # Global variables
8
+ df = None
9
+ text_embeddings = None
10
+ model = None
 
 
 
 
 
 
 
 
 
 
11
  results_dict = {}
12
 
13
+ # Lazy load model and data
14
+ def load_resources():
15
+ global df, text_embeddings, model
16
+ if df is None:
17
+ df = pd.read_csv("analyticsvidhyacomplete.csv", parse_dates=["Date"])
18
+ df["Date"] = pd.to_datetime(df["Date"], format='mixed', dayfirst=True, errors='coerce')
19
+ df["combined_text"] = (
20
+ df["Title"].astype(str) + " " +
21
+ df["Description"].astype(str) + " " +
22
+ df["Content"].astype(str)
23
+ )
24
+ if model is None:
25
+ model = SentenceTransformer("all-MiniLM-L6-v2")
26
+ if text_embeddings is None:
27
+ text_embeddings = model.encode(df["combined_text"].tolist(), convert_to_tensor=False)
28
 
29
  def format_markdown(top_results):
30
  markdown_output = ""
 
33
  link = row['Link']
34
  desc = row['Description']
35
  date_str = row['Date'].strftime('%Y-%m-%d') if pd.notnull(row['Date']) else 'N/A'
 
36
  markdown_output += f"### [{title}]({link})\n"
37
  markdown_output += f"**Date**: {date_str}\n\n"
38
  markdown_output += f"{desc}\n\n---\n"
39
  return markdown_output
40
 
 
 
 
41
  def process_query_csv(query_file):
42
  global results_dict
43
  results_dict = {}
44
 
45
+ load_resources() # Ensure model/data is loaded
46
+
47
  query_df = pd.read_csv(query_file.name)
48
 
49
  for idx, row in query_df.iterrows():
50
+ topic = row.get("Topic", "")
51
+ subtopic = row.get("Subtopic", "")
52
+ top_n = int(row.get("TopN", 5))
53
  query = f"Top {top_n} articles about {subtopic} in {topic}"
54
 
55
  query_embedding = model.encode([query], convert_to_tensor=False)
 
61
  label = f"{topic} - {subtopic} (Top {top_n})"
62
  results_dict[label] = format_markdown(top_results)
63
 
64
+ if results_dict:
65
+ first_key = list(results_dict.keys())[0]
66
+ return list(results_dict.keys()), results_dict[first_key]
67
+ else:
68
+ return [], "No results."
69
 
 
70
  def display_result(selected_query):
71
  return results_dict.get(selected_query, "No results found.")
72
 
 
75
  gr.Markdown("## 📄 Batch Query Article Retriever with Clickable Links")
76
 
77
  query_input = gr.File(label="Upload Query CSV (Topic, Subtopic, TopN)")
 
78
  run_btn = gr.Button("Run Retrieval")
 
79
  dropdown = gr.Dropdown(label="Select Query")
80
  output_md = gr.Markdown()
81
 
82
  run_btn.click(fn=process_query_csv, inputs=query_input, outputs=[dropdown, output_md])
83
  dropdown.change(fn=display_result, inputs=dropdown, outputs=output_md)
84
 
85
+ if __name__ == "__main__":
86
+ demo.launch()