Craw4ai-example

Sleeping

App Files Files Community

rairo commited on Jan 18, 2025

Commit

25996b1

verified ·

1 Parent(s): 3bf5c0a

Create app.py

Browse files

Files changed (1) hide show

app.py +84 -0

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import streamlit as st
+import pandas as pd
+import base64
+import json
+from scrapegraphai.graphs import SmartScraperGraph
+import google.generativeai as genai
+# Used to securely store your API key
+from google.colab import userdata
+import nest_asyncio
+import os
+nest_asyncio.apply()
+GOOGLE_API_KEY = os.environ['Gemini']
+graph_config = {
+    "llm": {
+        "api_key": GOOGLE_API_KEY,
+        "model": "google_genai/gemini-pro",
+    },
+}
+def get_data(url):
+  """
+  Fetches data from the given URL using scrapegraphai.
+  Args:
+    url: The URL to scrape.
+  Returns:
+    A dictionary containing the extracted data in the following format:
+      {'grants': [{'grant_name': ..., 'funding_organisation': ...,
+                  'due_date': ..., 'eligible_countries': ...,
+                  'eligibility_conditions': ...}, ...]}
+  """
+  smart_scraper_graph = SmartScraperGraph(
+      prompt="List me all grants or funds, the organisations funding them, the due date, eligible countries and eligibility conditions for applicants.",
+      source=url,
+      config=graph_config
+  )
+  result = smart_scraper_graph.run()
+  return result
+def convert_to_csv(data):
+  df = pd.DataFrame(data['grants'])
+  return df.to_csv(index=False).encode('utf-8')
+def convert_to_excel(data):
+  df = pd.DataFrame(data['grants'])
+  buffer = io.BytesIO()
+  with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
+    df.to_excel(writer, sheet_name='Grants', index=False)
+  return buffer.getvalue()
+st.title("Quantilytix Grant Scraper")
+url = st.text_input("Enter URL")
+if st.button("Get grants"):
+  if url:
+    try:
+      result = get_data(url)
+      st.success("Data scraped successfully!")
+      selected_format = st.selectbox("Select Download Format", ("CSV", "Excel"))
+      if selected_format == "CSV":
+        csv_data = convert_to_csv(result)
+        b64 = base64.b64encode(csv_data).decode()
+        download_link = f"<a href='data:application/vnd.ms-excel;base64,{b64}' download='grants.csv'>Download CSV</a>"
+        st.markdown(download_link, unsafe_allow_html=True)
+      elif selected_format == "Excel":
+        excel_data = convert_to_excel(result)
+        b64 = base64.b64encode(excel_data).decode()
+        download_link = f"<a href='data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}' download='grants.xlsx'>Download Excel</a>"
+        st.markdown(download_link, unsafe_allow_html=True)
+      st.dataframe(result['grants'])
+    except Exception as e:
+      st.error(f"Error scraping data: {e}")
+  else:
+    st.warning("Please enter a URL.")