Spaces:

rairo
/

QuantGrantsList

Sleeping

App Files Files Community

QuantGrantsList / test.py

rairo

Create test.py

2946558 verified about 1 year ago

raw

history blame contribute delete

3.11 kB

	import streamlit as st
	import pandas as pd
	import base64
	import json
	from scrapegraphai.graphs import SmartScraperGraph
	import nest_asyncio
	import os
	import subprocess
	import io

	# Ensure Playwright installs required browsers and dependencies
	subprocess.run(["playwright", "install"])
	#subprocess.run(["playwright", "install-deps"])

	nest_asyncio.apply()


	GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']

	graph_config = {
	"llm": {
	"api_key": GOOGLE_API_KEY,
	"model": "google_genai/gemini-pro",
	},
	}

	def get_data(url):
	"""
	Fetches data from the given URL using scrapegraphai.

	Args:
	url: The URL to scrape.

	Returns:
	A dictionary containing the extracted data in the following format:
	{'grants': [{'grant_name': ..., 'funding_organisation': ...,
	'due_date': ..., 'eligible_countries': ...,
	'eligibility_conditions': ...}, ...]}
	"""

	smart_scraper_graph = SmartScraperGraph(
	prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.",
	source=url,
	config=graph_config
	)

	result = smart_scraper_graph.run()
	return result

	def convert_to_csv(data):
	df = pd.DataFrame(data['grants'])
	return df.to_csv(index=False).encode('utf-8')

	def convert_to_excel(data):
	df = pd.DataFrame(data['grants'])
	buffer = io.BytesIO()
	with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
	df.to_excel(writer, sheet_name='Grants', index=False)
	return buffer.getvalue()

	st.title("Quantilytix Grant Scraper")

	url = st.text_input("Enter URL")

	if "scraped_data" not in st.session_state:
	st.session_state.scraped_data = None

	if st.button("Get grants"):
	if url:
	try:
	with st.spinner("Retrieving Grants, Please Wait...."):
	result = get_data(url)
	st.session_state.scraped_data = result # Store result in session state
	st.success("Data scraped successfully!")
	except Exception as e:
	st.error(f"Error scraping data: {e}")
	else:
	st.warning("Please enter a URL.")

	if st.session_state.scraped_data:
	selected_format = st.selectbox("Select Download Format", ("CSV", "Excel"))

	result = st.session_state.scraped_data # Access the saved result

	if selected_format == "CSV":
	csv_data = convert_to_csv(result)
	b64 = base64.b64encode(csv_data).decode()
	download_link = f"<a href='data:application/vnd.ms-excel;base64,{b64}' download='grants.csv'>Download CSV</a>"
	st.markdown(download_link, unsafe_allow_html=True)
	elif selected_format == "Excel":
	excel_data = convert_to_excel(result)
	b64 = base64.b64encode(excel_data).decode()
	download_link = f"<a href='data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}' download='grants.xlsx'>Download Excel</a>"
	st.markdown(download_link, unsafe_allow_html=True)

	st.dataframe(result['grants'])