Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import base64 | |
| import json | |
| from scrapegraphai.graphs import SmartScraperGraph | |
| import nest_asyncio | |
| import os | |
| import subprocess | |
| import io | |
| # Ensure Playwright installs required browsers and dependencies | |
| subprocess.run(["playwright", "install"]) | |
| #subprocess.run(["playwright", "install-deps"]) | |
| nest_asyncio.apply() | |
| GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY'] | |
| graph_config = { | |
| "llm": { | |
| "api_key": GOOGLE_API_KEY, | |
| "model": "google_genai/gemini-pro", | |
| }, | |
| } | |
| def get_data(url): | |
| """ | |
| Fetches data from the given URL using scrapegraphai. | |
| Args: | |
| url: The URL to scrape. | |
| Returns: | |
| A dictionary containing the extracted data in the following format: | |
| {'grants': [{'grant_name': ..., 'funding_organisation': ..., | |
| 'due_date': ..., 'eligible_countries': ..., | |
| 'eligibility_conditions': ...}, ...]} | |
| """ | |
| smart_scraper_graph = SmartScraperGraph( | |
| prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.", | |
| source=url, | |
| config=graph_config | |
| ) | |
| result = smart_scraper_graph.run() | |
| return result | |
| def convert_to_csv(data): | |
| df = pd.DataFrame(data['grants']) | |
| return df.to_csv(index=False).encode('utf-8') | |
| def convert_to_excel(data): | |
| df = pd.DataFrame(data['grants']) | |
| buffer = io.BytesIO() | |
| with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer: | |
| df.to_excel(writer, sheet_name='Grants', index=False) | |
| return buffer.getvalue() | |
| st.title("Quantilytix Grant Scraper") | |
| url = st.text_input("Enter URL") | |
| if "scraped_data" not in st.session_state: | |
| st.session_state.scraped_data = None | |
| if st.button("Get grants"): | |
| if url: | |
| try: | |
| with st.spinner("Retrieving Grants, Please Wait...."): | |
| result = get_data(url) | |
| st.session_state.scraped_data = result # Store result in session state | |
| st.success("Data scraped successfully!") | |
| except Exception as e: | |
| st.error(f"Error scraping data: {e}") | |
| else: | |
| st.warning("Please enter a URL.") | |
| if st.session_state.scraped_data: | |
| selected_format = st.selectbox("Select Download Format", ("CSV", "Excel")) | |
| result = st.session_state.scraped_data # Access the saved result | |
| if selected_format == "CSV": | |
| csv_data = convert_to_csv(result) | |
| b64 = base64.b64encode(csv_data).decode() | |
| download_link = f"<a href='data:application/vnd.ms-excel;base64,{b64}' download='grants.csv'>Download CSV</a>" | |
| st.markdown(download_link, unsafe_allow_html=True) | |
| elif selected_format == "Excel": | |
| excel_data = convert_to_excel(result) | |
| b64 = base64.b64encode(excel_data).decode() | |
| download_link = f"<a href='data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}' download='grants.xlsx'>Download Excel</a>" | |
| st.markdown(download_link, unsafe_allow_html=True) | |
| st.dataframe(result['grants']) |