rairo commited on
Commit
2946558
·
verified ·
1 Parent(s): 1a34d0c

Create test.py

Browse files
Files changed (1) hide show
  1. test.py +96 -0
test.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import base64
4
+ import json
5
+ from scrapegraphai.graphs import SmartScraperGraph
6
+ import nest_asyncio
7
+ import os
8
+ import subprocess
9
+ import io
10
+
11
+ # Ensure Playwright installs required browsers and dependencies
12
+ subprocess.run(["playwright", "install"])
13
+ #subprocess.run(["playwright", "install-deps"])
14
+
15
+ nest_asyncio.apply()
16
+
17
+
18
+ GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
19
+
20
+ graph_config = {
21
+ "llm": {
22
+ "api_key": GOOGLE_API_KEY,
23
+ "model": "google_genai/gemini-pro",
24
+ },
25
+ }
26
+
27
+ def get_data(url):
28
+ """
29
+ Fetches data from the given URL using scrapegraphai.
30
+
31
+ Args:
32
+ url: The URL to scrape.
33
+
34
+ Returns:
35
+ A dictionary containing the extracted data in the following format:
36
+ {'grants': [{'grant_name': ..., 'funding_organisation': ...,
37
+ 'due_date': ..., 'eligible_countries': ...,
38
+ 'eligibility_conditions': ...}, ...]}
39
+ """
40
+
41
+ smart_scraper_graph = SmartScraperGraph(
42
+ prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.",
43
+ source=url,
44
+ config=graph_config
45
+ )
46
+
47
+ result = smart_scraper_graph.run()
48
+ return result
49
+
50
+ def convert_to_csv(data):
51
+ df = pd.DataFrame(data['grants'])
52
+ return df.to_csv(index=False).encode('utf-8')
53
+
54
+ def convert_to_excel(data):
55
+ df = pd.DataFrame(data['grants'])
56
+ buffer = io.BytesIO()
57
+ with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
58
+ df.to_excel(writer, sheet_name='Grants', index=False)
59
+ return buffer.getvalue()
60
+
61
+ st.title("Quantilytix Grant Scraper")
62
+
63
+ url = st.text_input("Enter URL")
64
+
65
+ if "scraped_data" not in st.session_state:
66
+ st.session_state.scraped_data = None
67
+
68
+ if st.button("Get grants"):
69
+ if url:
70
+ try:
71
+ with st.spinner("Retrieving Grants, Please Wait...."):
72
+ result = get_data(url)
73
+ st.session_state.scraped_data = result # Store result in session state
74
+ st.success("Data scraped successfully!")
75
+ except Exception as e:
76
+ st.error(f"Error scraping data: {e}")
77
+ else:
78
+ st.warning("Please enter a URL.")
79
+
80
+ if st.session_state.scraped_data:
81
+ selected_format = st.selectbox("Select Download Format", ("CSV", "Excel"))
82
+
83
+ result = st.session_state.scraped_data # Access the saved result
84
+
85
+ if selected_format == "CSV":
86
+ csv_data = convert_to_csv(result)
87
+ b64 = base64.b64encode(csv_data).decode()
88
+ download_link = f"<a href='data:application/vnd.ms-excel;base64,{b64}' download='grants.csv'>Download CSV</a>"
89
+ st.markdown(download_link, unsafe_allow_html=True)
90
+ elif selected_format == "Excel":
91
+ excel_data = convert_to_excel(result)
92
+ b64 = base64.b64encode(excel_data).decode()
93
+ download_link = f"<a href='data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}' download='grants.xlsx'>Download Excel</a>"
94
+ st.markdown(download_link, unsafe_allow_html=True)
95
+
96
+ st.dataframe(result['grants'])