QuantGrantsList / test.py
rairo's picture
Create test.py
2946558 verified
import streamlit as st
import pandas as pd
import base64
import json
from scrapegraphai.graphs import SmartScraperGraph
import nest_asyncio
import os
import subprocess
import io
# Ensure Playwright installs required browsers and dependencies
subprocess.run(["playwright", "install"])
#subprocess.run(["playwright", "install-deps"])
nest_asyncio.apply()
GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
graph_config = {
"llm": {
"api_key": GOOGLE_API_KEY,
"model": "google_genai/gemini-pro",
},
}
def get_data(url):
"""
Fetches data from the given URL using scrapegraphai.
Args:
url: The URL to scrape.
Returns:
A dictionary containing the extracted data in the following format:
{'grants': [{'grant_name': ..., 'funding_organisation': ...,
'due_date': ..., 'eligible_countries': ...,
'eligibility_conditions': ...}, ...]}
"""
smart_scraper_graph = SmartScraperGraph(
prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.",
source=url,
config=graph_config
)
result = smart_scraper_graph.run()
return result
def convert_to_csv(data):
df = pd.DataFrame(data['grants'])
return df.to_csv(index=False).encode('utf-8')
def convert_to_excel(data):
df = pd.DataFrame(data['grants'])
buffer = io.BytesIO()
with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
df.to_excel(writer, sheet_name='Grants', index=False)
return buffer.getvalue()
st.title("Quantilytix Grant Scraper")
url = st.text_input("Enter URL")
if "scraped_data" not in st.session_state:
st.session_state.scraped_data = None
if st.button("Get grants"):
if url:
try:
with st.spinner("Retrieving Grants, Please Wait...."):
result = get_data(url)
st.session_state.scraped_data = result # Store result in session state
st.success("Data scraped successfully!")
except Exception as e:
st.error(f"Error scraping data: {e}")
else:
st.warning("Please enter a URL.")
if st.session_state.scraped_data:
selected_format = st.selectbox("Select Download Format", ("CSV", "Excel"))
result = st.session_state.scraped_data # Access the saved result
if selected_format == "CSV":
csv_data = convert_to_csv(result)
b64 = base64.b64encode(csv_data).decode()
download_link = f"<a href='data:application/vnd.ms-excel;base64,{b64}' download='grants.csv'>Download CSV</a>"
st.markdown(download_link, unsafe_allow_html=True)
elif selected_format == "Excel":
excel_data = convert_to_excel(result)
b64 = base64.b64encode(excel_data).decode()
download_link = f"<a href='data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}' download='grants.xlsx'>Download Excel</a>"
st.markdown(download_link, unsafe_allow_html=True)
st.dataframe(result['grants'])