rairo commited on
Commit
e4fc921
·
verified ·
1 Parent(s): 65acf1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -51
app.py CHANGED
@@ -89,68 +89,104 @@ def get_data(search_term):
89
  st.error(f"An error occurred for search term: {search_term}, error: {e}. Please try again.")
90
  return {}
91
 
 
 
 
 
 
 
 
 
 
92
  def get_data_from_url(url):
93
  """
94
- Scrape the provided URL using Supadata and pass the page content directly to the Gemini model
95
- (using raw Google model) to extract grant data in a JSON structure.
96
  """
 
 
 
97
  try:
98
- # Try using the supadata.web.scrape method
 
 
 
 
 
 
 
 
 
99
  try:
100
- web_content = supadata.web.scrape(url)
101
- except TypeError as te:
102
- # Fallback if an unexpected keyword argument 'type' is raised
103
- if "unexpected keyword argument 'type'" in str(te):
104
- st.warning("Falling back due to unexpected keyword argument 'type'.")
105
- if hasattr(supadata, "scrape"):
106
- web_content = supadata.scrape(url)
107
- else:
108
- st.error("Fallback method not available: supadata does not have 'scrape'.")
109
- return {}
110
  else:
111
- raise te
 
 
112
 
113
- page_content = web_content.content
114
- full_prompt = (
115
- "Extract the following grant data from the provided web content. "
116
- "- Grant name/title\n"
117
- "- Short summary\n"
118
- "- Funding organization\n"
119
- "- Grant value (numeric only)\n"
120
- "- Application deadline\n"
121
- "- Eligible countries\n"
122
- "- Sector/field\n"
123
- "- Eligibility criteria\n"
124
- "Return in JSON format.\n\n"
125
- f"Web content: {page_content}"
126
- )
127
- client = genai.Client(api_key=GOOGLE_API_KEY)
128
- new_answer = client.models.generate_content(
129
- model="models/gemini-2.0-flash-lite",
130
- contents=f"{full_prompt}, return the json string and nothing else"
131
- )
132
- response = new_answer.text
133
  try:
134
- # Extract the JSON string from the response
135
- start_index = response.find('[')
136
- end_index = response.rfind(']') + 1
137
- json_string = response[start_index:end_index]
138
- result = json.loads(json_string)
139
- except Exception as parse_error:
140
- st.error(f"Error parsing JSON from Gemini model response. Here is the response: {response}")
141
- return {}
142
- # If result is a list, wrap it in a dictionary with the key "grants"
143
- if isinstance(result, list):
144
- result = {"grants": result}
145
- if not result.get("grants"):
146
- st.error("No grant opportunities found in the scraped URL.")
147
  return {}
148
- st.success(f"First grant opportunity: {result['grants'][0]}")
149
- return result
150
- except Exception as e:
151
- st.error(f"An error occurred while scraping URL {url}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  return {}
153
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
 
156
 
 
89
  st.error(f"An error occurred for search term: {search_term}, error: {e}. Please try again.")
90
  return {}
91
 
92
+
93
+ import requests
94
+ import json
95
+ import streamlit as st
96
+ import google.generativeai as genai
97
+
98
+ SUPADATA_API_KEY = "your_supadata_api_key" # Replace with actual key
99
+ GOOGLE_API_KEY = "your_google_api_key" # Replace with actual key
100
+
101
  def get_data_from_url(url):
102
  """
103
+ Scrape the provided URL using Supadata. If it fails, fall back to the Supadata API,
104
+ and if that fails, fall back to a direct request. Extract grant data using Gemini AI.
105
  """
106
+ page_content = None # Placeholder for storing scraped page content
107
+
108
+ # **Step 1: Attempt Supadata's Built-in Scraper**
109
  try:
110
+ web_content = supadata.web.scrape(url)
111
+ page_content = web_content.content
112
+ except TypeError as te:
113
+ if "unexpected keyword argument 'type'" in str(te):
114
+ st.warning("Falling back to Supadata API due to unexpected keyword 'type' error.")
115
+ else:
116
+ st.error(f"Unexpected error in Supadata scrape: {te}")
117
+
118
+ # **Step 2: If Supadata's Built-in Scraper Fails, Use Supadata API**
119
+ if not page_content:
120
  try:
121
+ api_url = "https://api.supadata.ai/v1/web/scrape"
122
+ headers = {"X-API-Key": SUPADATA_API_KEY}
123
+ response = requests.get(api_url, headers=headers, params={"url": url})
124
+
125
+ if response.status_code == 200:
126
+ page_content = response.json().get("content", "")
 
 
 
 
127
  else:
128
+ st.error(f"Supadata API failed with status {response.status_code}")
129
+ except Exception as e:
130
+ st.error(f"Error calling Supadata API: {e}")
131
 
132
+ # **Step 3: If Supadata API Fails, Use Direct Web Request**
133
+ if not page_content:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  try:
135
+ r = requests.get(url, timeout=10)
136
+ if r.status_code == 200:
137
+ page_content = r.text
138
+ else:
139
+ st.error(f"Manual scraping failed with status code {r.status_code}")
140
+ return {}
141
+ except Exception as e:
142
+ st.error(f"Manual scraping error: {e}")
 
 
 
 
 
143
  return {}
144
+
145
+ # **Pass Content to Gemini AI**
146
+ full_prompt = (
147
+ "Extract the following grant data from the provided web content. "
148
+ "- Grant name/title\n"
149
+ "- Short summary\n"
150
+ "- Funding organization\n"
151
+ "- Grant value (numeric only)\n"
152
+ "- Application deadline\n"
153
+ "- Eligible countries\n"
154
+ "- Sector/field\n"
155
+ "- Eligibility criteria\n"
156
+ "Return in JSON format.\n\n"
157
+ f"Web content: {page_content}"
158
+ )
159
+
160
+ client = genai.Client(api_key=GOOGLE_API_KEY)
161
+ new_answer = client.models.generate_content(
162
+ model="models/gemini-2.0-flash-lite",
163
+ contents=f"{full_prompt}, return the json string and nothing else"
164
+ )
165
+
166
+ response = new_answer.text
167
+
168
+ # **Extract JSON Output from Gemini**
169
+ try:
170
+ start_index = response.find('[')
171
+ end_index = response.rfind(']') + 1
172
+ json_string = response[start_index:end_index]
173
+ result = json.loads(json_string)
174
+ except Exception as parse_error:
175
+ st.error(f"Error parsing JSON from Gemini model response. Response: {response}")
176
  return {}
177
 
178
+ # **Ensure JSON is Wrapped Correctly**
179
+ if isinstance(result, list):
180
+ result = {"grants": result}
181
+
182
+ if not result.get("grants"):
183
+ st.error("No grant opportunities found in the scraped URL.")
184
+ return {}
185
+
186
+ st.success(f"First grant opportunity: {result['grants'][0]}")
187
+ return result
188
+
189
+
190
 
191
 
192