Craw4ai-example

Sleeping

App Files Files Community

rairo commited on Jan 22, 2025

Commit

2946558

verified ·

1 Parent(s): 1a34d0c

Create test.py

Browse files

Files changed (1) hide show

test.py +96 -0

test.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import streamlit as st
+import pandas as pd
+import base64
+import json
+from scrapegraphai.graphs import SmartScraperGraph
+import nest_asyncio
+import os
+import subprocess
+import io
+# Ensure Playwright installs required browsers and dependencies
+subprocess.run(["playwright", "install"])
+#subprocess.run(["playwright", "install-deps"])
+nest_asyncio.apply()
+GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
+graph_config = {
+    "llm": {
+        "api_key": GOOGLE_API_KEY,
+        "model": "google_genai/gemini-pro",
+    },
+}
+def get_data(url):
+  """
+  Fetches data from the given URL using scrapegraphai.
+  Args:
+    url: The URL to scrape.
+  Returns:
+    A dictionary containing the extracted data in the following format:
+      {'grants': [{'grant_name': ..., 'funding_organisation': ...,
+                  'due_date': ..., 'eligible_countries': ...,
+                  'eligibility_conditions': ...}, ...]}
+  """
+  smart_scraper_graph = SmartScraperGraph(
+      prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.",
+      source=url,
+      config=graph_config
+  )
+  result = smart_scraper_graph.run()
+  return result
+def convert_to_csv(data):
+  df = pd.DataFrame(data['grants'])
+  return df.to_csv(index=False).encode('utf-8')
+def convert_to_excel(data):
+  df = pd.DataFrame(data['grants'])
+  buffer = io.BytesIO()
+  with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
+    df.to_excel(writer, sheet_name='Grants', index=False)
+  return buffer.getvalue()
+st.title("Quantilytix Grant Scraper")
+url = st.text_input("Enter URL")
+if "scraped_data" not in st.session_state:
+    st.session_state.scraped_data = None
+if st.button("Get grants"):
+    if url:
+        try:
+            with st.spinner("Retrieving Grants, Please Wait...."):
+                result = get_data(url)
+                st.session_state.scraped_data = result  # Store result in session state
+                st.success("Data scraped successfully!")
+        except Exception as e:
+            st.error(f"Error scraping data: {e}")
+    else:
+        st.warning("Please enter a URL.")
+if st.session_state.scraped_data:
+    selected_format = st.selectbox("Select Download Format", ("CSV", "Excel"))
+    result = st.session_state.scraped_data  # Access the saved result
+    if selected_format == "CSV":
+        csv_data = convert_to_csv(result)
+        b64 = base64.b64encode(csv_data).decode()
+        download_link = f"<a href='data:application/vnd.ms-excel;base64,{b64}' download='grants.csv'>Download CSV</a>"
+        st.markdown(download_link, unsafe_allow_html=True)
+    elif selected_format == "Excel":
+        excel_data = convert_to_excel(result)
+        b64 = base64.b64encode(excel_data).decode()
+        download_link = f"<a href='data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}' download='grants.xlsx'>Download Excel</a>"
+        st.markdown(download_link, unsafe_allow_html=True)
+    st.dataframe(result['grants'])