rairo commited on
Commit
ccfef3f
·
verified ·
1 Parent(s): 7d22bc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -27
app.py CHANGED
@@ -8,19 +8,22 @@ import os
8
  import subprocess
9
  import io
10
  import time
 
11
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
12
  from langchain.vectorstores import FAISS
13
  from langchain.text_splitter import CharacterTextSplitter
14
  from langchain.chains import ConversationalRetrievalChain
15
  from langchain.memory import ConversationBufferMemory
16
- import urllib.parse
 
 
 
 
17
 
18
  # Ensure Playwright installs required browsers and dependencies
19
  subprocess.run(["playwright", "install"])
20
  nest_asyncio.apply()
21
 
22
-
23
-
24
  GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
25
 
26
  graph_config = {
@@ -31,7 +34,6 @@ graph_config = {
31
  "max_results": 8,
32
  "verbose": True,
33
  "headless": True
34
-
35
  }
36
 
37
  def get_data(search_term):
@@ -60,7 +62,7 @@ def get_data(search_term):
60
  )
61
  result = search_graph.run()
62
  if not result or not result.get("grants"):
63
- st.error(f"No results returned for {search_term}. Please try again with a different search_term.")
64
  return {}
65
  return result
66
  except Exception as e:
@@ -82,8 +84,47 @@ def get_data(search_term):
82
  st.error(f"Retry failed: {e2}. Please try again later.")
83
  return {}
84
  else:
85
- st.error(f"An error occurred for search term : {search_term}, error: {e}. Please try again.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  return {}
 
 
 
 
87
 
88
  def process_multiple_search_terms(search_terms):
89
  """
@@ -182,27 +223,52 @@ def main():
182
  if "chat_interface_active" not in st.session_state:
183
  st.session_state.chat_interface_active = False
184
 
185
- # Sidebar: Search Term Input
186
- search_input = st.sidebar.text_area(
187
- "Enter Search Terms (one per line). maximum 2",
188
- height=150,
189
- help="Input search terms to discover grant opportunities. Terms can be specific or generic.",
190
- placeholder="e.g.,\nRenewable energy \nclimate change research\nAgriculture in Africa"
191
  )
192
 
193
- if st.sidebar.button("🔍 Get Grant Opportunities"):
194
- if search_input:
195
- search_terms = [term.strip() for term in search_input.split("\n") if term.strip()]
196
- if search_terms:
197
- with st.spinner("Searching in progress... Please wait patiently."):
198
- result = process_multiple_search_terms(search_terms)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
  st.session_state.scraped_data = result
200
  if result.get("grants"):
201
- st.sidebar.success(f"✅ Found {len(result['grants'])} grant opportunities from {len(search_terms)} search terms!")
202
  else:
203
- st.sidebar.warning("⚠️ Please enter valid search terms.")
204
- else:
205
- st.sidebar.warning("⚠️ Please enter at least one search term to begin.")
206
 
207
  # Sidebar: Download & Share Controls
208
  if st.session_state.scraped_data and st.session_state.scraped_data.get('grants'):
@@ -254,18 +320,15 @@ def main():
254
 
255
  if st.session_state.chat_history:
256
  st.subheader("Chat History")
257
- # Reverse the chat history to show the latest messages first
258
  for chat in reversed(st.session_state.chat_history):
259
- # User message: dark grey background with white text
260
  st.markdown(
261
  f"<div style='padding: 10px; border-radius: 5px; margin-bottom: 5px; background-color:#444444; color: white;'><strong>You:</strong> {chat['query']}</div>",
262
  unsafe_allow_html=True)
263
- # Bot message: blue background with white text
264
  st.markdown(
265
  f"<div style='padding: 10px; border-radius: 5px; margin-bottom: 10px; background-color:#007BFF; color: white;'><strong>Grants Bot:</strong> {chat['response']}</div>",
266
  unsafe_allow_html=True)
267
  else:
268
- st.info("⬅️ Enter search terms in the sidebar and click 'Get Grant Opportunities' to start searching.")
269
 
270
  st.sidebar.markdown("---")
271
  st.sidebar.markdown(
@@ -278,4 +341,4 @@ def main():
278
  )
279
 
280
  if __name__ == "__main__":
281
- main()
 
8
  import subprocess
9
  import io
10
  import time
11
+ import urllib.parse
12
  from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
13
  from langchain.vectorstores import FAISS
14
  from langchain.text_splitter import CharacterTextSplitter
15
  from langchain.chains import ConversationalRetrievalChain
16
  from langchain.memory import ConversationBufferMemory
17
+
18
+ # Import Supadata and initialize the client
19
+ from supadata import Supadata, SupadataError
20
+ SUPADATA_API_KEY = os.getenv("SUPADATA")
21
+ supadata = Supadata(api_key=SUPADATA_API_KEY)
22
 
23
  # Ensure Playwright installs required browsers and dependencies
24
  subprocess.run(["playwright", "install"])
25
  nest_asyncio.apply()
26
 
 
 
27
  GOOGLE_API_KEY = os.environ["GOOGLE_API_KEY"]
28
 
29
  graph_config = {
 
34
  "max_results": 8,
35
  "verbose": True,
36
  "headless": True
 
37
  }
38
 
39
  def get_data(search_term):
 
62
  )
63
  result = search_graph.run()
64
  if not result or not result.get("grants"):
65
+ st.error(f"No results returned for {search_term}. Please try again with a different search term.")
66
  return {}
67
  return result
68
  except Exception as e:
 
84
  st.error(f"Retry failed: {e2}. Please try again later.")
85
  return {}
86
  else:
87
+ st.error(f"An error occurred for search term: {search_term}, error: {e}. Please try again.")
88
+ return {}
89
+
90
+ def get_data_from_url(url):
91
+ """
92
+ Scrape the provided URL using Supadata and pass the page content directly to the Gemini model
93
+ (using ChatGoogleGenerativeAI) to extract grant data in a JSON structure.
94
+ """
95
+ try:
96
+ web_content = supadata.web.scrape(url)
97
+ page_content = web_content.content
98
+ full_prompt = (
99
+ "Extract the following grant data from the provided web content. "
100
+ "List me all grants or funds with:\n"
101
+ "- Grant name/title\n"
102
+ "- Short summary\n"
103
+ "- Funding organization\n"
104
+ "- Grant value (numeric only)\n"
105
+ "- Application deadline\n"
106
+ "- Eligible countries\n"
107
+ "- Sector/field\n"
108
+ "- Eligibility criteria\n"
109
+ "Return in JSON format.\n\n"
110
+ f"Web content: {page_content}"
111
+ )
112
+ llm = ChatGoogleGenerativeAI(
113
+ model="gemini-2.0-flash-thinking-exp", google_api_key=GOOGLE_API_KEY, temperature=0
114
+ )
115
+ response = llm.invoke(full_prompt)
116
+ try:
117
+ result = json.loads(response.content)
118
+ except Exception as parse_error:
119
+ st.error("Error parsing JSON from Gemini model response.")
120
+ return {}
121
+ if not result or not result.get("grants"):
122
+ st.error("No grant opportunities found in the scraped URL.")
123
  return {}
124
+ return result
125
+ except Exception as e:
126
+ st.error(f"An error occurred while scraping URL {url}: {e}")
127
+ return {}
128
 
129
  def process_multiple_search_terms(search_terms):
130
  """
 
223
  if "chat_interface_active" not in st.session_state:
224
  st.session_state.chat_interface_active = False
225
 
226
+ # Sidebar: Input Type Selection
227
+ input_type = st.sidebar.radio(
228
+ "Select Input Type:",
229
+ ("Search Query", "URL"),
230
+ key="input_type_selector"
 
231
  )
232
 
233
+ # Sidebar: Input field based on selection
234
+ if input_type == "Search Query":
235
+ search_input = st.sidebar.text_area(
236
+ "Enter Search Terms (one per line). Maximum 2",
237
+ height=150,
238
+ help="Input search terms to discover grant opportunities. Terms can be specific or generic.",
239
+ placeholder="e.g.,\nRenewable energy \nclimate change research\nAgriculture in Africa"
240
+ )
241
+ else:
242
+ url_input = st.sidebar.text_input(
243
+ "Enter URL to scrape for grant opportunities",
244
+ placeholder="https://example.com/grants"
245
+ )
246
+
247
+ # Execute based on input type selection
248
+ if input_type == "Search Query":
249
+ if st.sidebar.button("🔍 Get Grant Opportunities"):
250
+ if search_input:
251
+ search_terms = [term.strip() for term in search_input.split("\n") if term.strip()]
252
+ if search_terms:
253
+ with st.spinner("Searching in progress... Please wait patiently."):
254
+ result = process_multiple_search_terms(search_terms)
255
+ st.session_state.scraped_data = result
256
+ if result.get("grants"):
257
+ st.sidebar.success(f"✅ Found {len(result['grants'])} grant opportunities from {len(search_terms)} search terms!")
258
+ else:
259
+ st.sidebar.warning("⚠️ Please enter valid search terms.")
260
+ else:
261
+ st.sidebar.warning("⚠️ Please enter at least one search term to begin.")
262
+ else: # URL input
263
+ if st.sidebar.button("🔍 Scrape URL for Grant Opportunities"):
264
+ if url_input:
265
+ with st.spinner("Scraping URL... Please wait patiently."):
266
+ result = get_data_from_url(url_input)
267
  st.session_state.scraped_data = result
268
  if result.get("grants"):
269
+ st.sidebar.success(f"✅ Found {len(result['grants'])} grant opportunities from the URL!")
270
  else:
271
+ st.sidebar.warning("⚠️ Please enter a valid URL to scrape.")
 
 
272
 
273
  # Sidebar: Download & Share Controls
274
  if st.session_state.scraped_data and st.session_state.scraped_data.get('grants'):
 
320
 
321
  if st.session_state.chat_history:
322
  st.subheader("Chat History")
 
323
  for chat in reversed(st.session_state.chat_history):
 
324
  st.markdown(
325
  f"<div style='padding: 10px; border-radius: 5px; margin-bottom: 5px; background-color:#444444; color: white;'><strong>You:</strong> {chat['query']}</div>",
326
  unsafe_allow_html=True)
 
327
  st.markdown(
328
  f"<div style='padding: 10px; border-radius: 5px; margin-bottom: 10px; background-color:#007BFF; color: white;'><strong>Grants Bot:</strong> {chat['response']}</div>",
329
  unsafe_allow_html=True)
330
  else:
331
+ st.info("⬅️ Enter search terms or a URL in the sidebar and click the appropriate button to start searching.")
332
 
333
  st.sidebar.markdown("---")
334
  st.sidebar.markdown(
 
341
  )
342
 
343
  if __name__ == "__main__":
344
+ main()