Spaces:

rairo
/

QuantGrantsList

Sleeping

App Files Files Community

rairo commited on Jan 22, 2025

Commit

4b36e78

verified ·

1 Parent(s): 689c1b5

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -33

app.py CHANGED Viewed

@@ -7,6 +7,11 @@ import nest_asyncio
 import os
 import subprocess
 import io
 # Ensure Playwright installs required browsers and dependencies
 subprocess.run(["playwright", "install"])
@@ -14,7 +19,6 @@ subprocess.run(["playwright", "install"])
 nest_asyncio.apply()
 GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
 graph_config = {
@@ -25,39 +29,68 @@ graph_config = {
 }
 def get_data(url):
-  """
-  Fetches data from the given URL using scrapegraphai.
-  Args:
-    url: The URL to scrape.
-  Returns:
-    A dictionary containing the extracted data in the following format:
-      {'grants': [{'grant_name': ..., 'funding_organisation': ...,
-                  'due_date': ..., 'eligible_countries': ...,
-                  'eligibility_conditions': ...}, ...]}
-  """
-  smart_scraper_graph = SmartScraperGraph(
-      prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.",
-      source=url,
-      config=graph_config
-  )
-  result = smart_scraper_graph.run()
-  return result
 def convert_to_csv(data):
-  df = pd.DataFrame(data['grants'])
-  return df.to_csv(index=False).encode('utf-8')
 def convert_to_excel(data):
-  df = pd.DataFrame(data['grants'])
-  buffer = io.BytesIO()
-  with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
-    df.to_excel(writer, sheet_name='Grants', index=False)
-  return buffer.getvalue()
 def main():
     st.sidebar.title("Quantilytix Grant Scraper")
@@ -70,6 +103,9 @@ def main():
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
     if st.sidebar.button("Get grants"):
         if url:
             try:
@@ -101,6 +137,7 @@ def main():
         st.dataframe(result['grants'])
         if st.sidebar.button("Load as Knowledge Base"):
             st.session_state.chat_interface_active = True
     if "chat_interface_active" in st.session_state and st.session_state.chat_interface_active:
@@ -108,9 +145,11 @@ def main():
         query = st.text_input("Ask a question about the grants:", key="chat_input")
         if query:
-            # Placeholder for response generation logic
-            response = f"Response to '{query}' based on the knowledge base."  # Simulated response
-            st.session_state.chat_history.append({"query": query, "response": response})
         # Display chat history
         for chat in st.session_state.chat_history:

 import os
 import subprocess
 import io
+from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.chains import ConversationalRetrievalChain
+from langchain.memory import ConversationBufferMemory
 # Ensure Playwright installs required browsers and dependencies
 subprocess.run(["playwright", "install"])
 nest_asyncio.apply()
 GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
 graph_config = {
 }
 def get_data(url):
+    """
+    Fetches data from the given URL using scrapegraphai.
+    Args:
+        url: The URL to scrape.
+    Returns:
+        A dictionary containing the extracted data in the following format:
+        {'grants': [{'grant_name': ..., 'funding_organisation': ...,
+                    'due_date': ..., 'eligible_countries': ...,
+                    'eligibility_conditions': ...}, ...]}
+    """
+    smart_scraper_graph = SmartScraperGraph(
+        prompt="List me all grants or funds,short summary of grant description,the organisations funding them, The value of the grant as an integer, the due date, eligible countries and eligibility criteria for applicants.",
+        source=url,
+        config=graph_config
+    )
+    result = smart_scraper_graph.run()
+    return result
 def convert_to_csv(data):
+    df = pd.DataFrame(data['grants'])
+    return df.to_csv(index=False).encode('utf-8')
 def convert_to_excel(data):
+    df = pd.DataFrame(data['grants'])
+    buffer = io.BytesIO()
+    with pd.ExcelWriter(buffer, engine='xlsxwriter') as writer:
+        df.to_excel(writer, sheet_name='Grants', index=False)
+    return buffer.getvalue()
+def create_knowledge_base(data):
+    """
+    Creates a knowledge base from the scraped data using FAISS and GoogleGenerativeAIEmbeddings.
+    Args:
+        data: The scraped data in dictionary format.
+    Returns:
+        A ConversationalRetrievalChain object for querying the knowledge base.
+    """
+    # Convert the data into a list of strings
+    documents = []
+    for grant in data['grants']:
+        doc = f"Grant Name: {grant['grant_name']}\nFunding Organisation: {grant['funding_organisation']}\nDue Date: {grant['due_date']}\nEligible Countries: {grant['eligible_countries']}\nEligibility Conditions: {grant['eligibility_conditions']}"
+        documents.append(doc)
+    # Split the documents into chunks
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+    texts = text_splitter.create_documents(documents)
+    # Create embeddings and store them in FAISS
+    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
+    vectorstore = FAISS.from_documents(texts, embeddings)
+    # Create a conversational retrieval chain
+    llm = ChatGoogleGenerativeAI(model="gemini-pro", google_api_key=GOOGLE_API_KEY, temperature=0)
+    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+    qa_chain = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), memory=memory)
+    return qa_chain
 def main():
     st.sidebar.title("Quantilytix Grant Scraper")
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = []
+    if "qa_chain" not in st.session_state:
+        st.session_state.qa_chain = None
     if st.sidebar.button("Get grants"):
         if url:
             try:
         st.dataframe(result['grants'])
         if st.sidebar.button("Load as Knowledge Base"):
+            st.session_state.qa_chain = create_knowledge_base(result)
             st.session_state.chat_interface_active = True
     if "chat_interface_active" in st.session_state and st.session_state.chat_interface_active:
         query = st.text_input("Ask a question about the grants:", key="chat_input")
         if query:
+            if st.session_state.qa_chain:
+                response = st.session_state.qa_chain({"question": query})
+                st.session_state.chat_history.append({"query": query, "response": response['answer']})
+            else:
+                st.error("Knowledge base not loaded. Please load the knowledge base first.")
         # Display chat history
         for chat in st.session_state.chat_history: