rairo commited on
Commit
4b36e78
·
verified ·
1 Parent(s): 689c1b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -33
app.py CHANGED
@@ -7,6 +7,11 @@ import nest_asyncio
7
  import os
8
  import subprocess
9
  import io
 
 
 
 
 
10
 
11
  # Ensure Playwright installs required browsers and dependencies
12
  subprocess.run(["playwright", "install"])
@@ -14,7 +19,6 @@ subprocess.run(["playwright", "install"])
14
 
15
  nest_asyncio.apply()
16
 
17
-
18
  GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
19
 
20
  graph_config = {
@@ -25,39 +29,68 @@ graph_config = {
25
  }
26
 
27
  def get_data(url):
28
- """
29
- Fetches data from the given URL using scrapegraphai.
30
-
31
- Args:
32
- url: The URL to scrape.
33
-
34
- Returns:
35
- A dictionary containing the extracted data in the following format:
36
- {'grants': [{'grant_name': ..., 'funding_organisation': ...,
37
- 'due_date': ..., 'eligible_countries': ...,
38
- 'eligibility_conditions': ...}, ...]}
39
- """
40
-
41
- smart_scraper_graph = SmartScraperGraph(
42
- prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.",
43
- source=url,
44
- config=graph_config
45
- )
46
-
47
- result = smart_scraper_graph.run()
48
- return result
49
 
50
  def convert_to_csv(data):
51
- df = pd.DataFrame(data['grants'])
52
- return df.to_csv(index=False).encode('utf-8')
53
 
54
  def convert_to_excel(data):
55
- df = pd.DataFrame(data['grants'])
56
- buffer = io.BytesIO()
57
- with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
58
- df.to_excel(writer, sheet_name='Grants', index=False)
59
- return buffer.getvalue()
60
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  def main():
63
  st.sidebar.title("Quantilytix Grant Scraper")
@@ -70,6 +103,9 @@ def main():
70
  if "chat_history" not in st.session_state:
71
  st.session_state.chat_history = []
72
 
 
 
 
73
  if st.sidebar.button("Get grants"):
74
  if url:
75
  try:
@@ -101,6 +137,7 @@ def main():
101
  st.dataframe(result['grants'])
102
 
103
  if st.sidebar.button("Load as Knowledge Base"):
 
104
  st.session_state.chat_interface_active = True
105
 
106
  if "chat_interface_active" in st.session_state and st.session_state.chat_interface_active:
@@ -108,9 +145,11 @@ def main():
108
 
109
  query = st.text_input("Ask a question about the grants:", key="chat_input")
110
  if query:
111
- # Placeholder for response generation logic
112
- response = f"Response to '{query}' based on the knowledge base." # Simulated response
113
- st.session_state.chat_history.append({"query": query, "response": response})
 
 
114
 
115
  # Display chat history
116
  for chat in st.session_state.chat_history:
 
7
  import os
8
  import subprocess
9
  import io
10
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
11
+ from langchain.vectorstores import FAISS
12
+ from langchain.text_splitter import CharacterTextSplitter
13
+ from langchain.chains import ConversationalRetrievalChain
14
+ from langchain.memory import ConversationBufferMemory
15
 
16
  # Ensure Playwright installs required browsers and dependencies
17
  subprocess.run(["playwright", "install"])
 
19
 
20
  nest_asyncio.apply()
21
 
 
22
  GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
23
 
24
  graph_config = {
 
29
  }
30
 
31
  def get_data(url):
32
+ """
33
+ Fetches data from the given URL using scrapegraphai.
34
+
35
+ Args:
36
+ url: The URL to scrape.
37
+
38
+ Returns:
39
+ A dictionary containing the extracted data in the following format:
40
+ {'grants': [{'grant_name': ..., 'funding_organisation': ...,
41
+ 'due_date': ..., 'eligible_countries': ...,
42
+ 'eligibility_conditions': ...}, ...]}
43
+ """
44
+ smart_scraper_graph = SmartScraperGraph(
45
+ prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.",
46
+ source=url,
47
+ config=graph_config
48
+ )
49
+
50
+ result = smart_scraper_graph.run()
51
+ return result
 
52
 
53
  def convert_to_csv(data):
54
+ df = pd.DataFrame(data['grants'])
55
+ return df.to_csv(index=False).encode('utf-8')
56
 
57
  def convert_to_excel(data):
58
+ df = pd.DataFrame(data['grants'])
59
+ buffer = io.BytesIO()
60
+ with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
61
+ df.to_excel(writer, sheet_name='Grants', index=False)
62
+ return buffer.getvalue()
63
+
64
+ def create_knowledge_base(data):
65
+ """
66
+ Creates a knowledge base from the scraped data using FAISS and GoogleGenerativeAIEmbeddings.
67
+
68
+ Args:
69
+ data: The scraped data in dictionary format.
70
+
71
+ Returns:
72
+ A ConversationalRetrievalChain object for querying the knowledge base.
73
+ """
74
+ # Convert the data into a list of strings
75
+ documents = []
76
+ for grant in data['grants']:
77
+ doc = f"Grant Name: {grant['grant_name']}\nFunding Organisation: {grant['funding_organisation']}\nDue Date: {grant['due_date']}\nEligible Countries: {grant['eligible_countries']}\nEligibility Conditions: {grant['eligibility_conditions']}"
78
+ documents.append(doc)
79
+
80
+ # Split the documents into chunks
81
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
82
+ texts = text_splitter.create_documents(documents)
83
+
84
+ # Create embeddings and store them in FAISS
85
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
86
+ vectorstore = FAISS.from_documents(texts, embeddings)
87
+
88
+ # Create a conversational retrieval chain
89
+ llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY, temperature=0)
90
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
91
+ qa_chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), memory=memory)
92
+
93
+ return qa_chain
94
 
95
  def main():
96
  st.sidebar.title("Quantilytix Grant Scraper")
 
103
  if "chat_history" not in st.session_state:
104
  st.session_state.chat_history = []
105
 
106
+ if "qa_chain" not in st.session_state:
107
+ st.session_state.qa_chain = None
108
+
109
  if st.sidebar.button("Get grants"):
110
  if url:
111
  try:
 
137
  st.dataframe(result['grants'])
138
 
139
  if st.sidebar.button("Load as Knowledge Base"):
140
+ st.session_state.qa_chain = create_knowledge_base(result)
141
  st.session_state.chat_interface_active = True
142
 
143
  if "chat_interface_active" in st.session_state and st.session_state.chat_interface_active:
 
145
 
146
  query = st.text_input("Ask a question about the grants:", key="chat_input")
147
  if query:
148
+ if st.session_state.qa_chain:
149
+ response = st.session_state.qa_chain({"question": query})
150
+ st.session_state.chat_history.append({"query": query, "response": response['answer']})
151
+ else:
152
+ st.error("Knowledge base not loaded. Please load the knowledge base first.")
153
 
154
  # Display chat history
155
  for chat in st.session_state.chat_history: