Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,11 +7,6 @@ import nest_asyncio
|
|
| 7 |
import os
|
| 8 |
import subprocess
|
| 9 |
import io
|
| 10 |
-
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
|
| 11 |
-
from langchain.vectorstores import FAISS
|
| 12 |
-
from langchain.text_splitter import CharacterTextSplitter
|
| 13 |
-
from langchain.chains import ConversationalRetrievalChain
|
| 14 |
-
|
| 15 |
|
| 16 |
# Ensure Playwright installs required browsers and dependencies
|
| 17 |
subprocess.run(["playwright", "install"])
|
|
@@ -29,14 +24,6 @@ graph_config = {
|
|
| 29 |
},
|
| 30 |
}
|
| 31 |
|
| 32 |
-
llm = ChatGoogleGenerativeAI(
|
| 33 |
-
model="gemini-1.5-flash",
|
| 34 |
-
temperature=0,
|
| 35 |
-
max_tokens=None,
|
| 36 |
-
timeout=None,
|
| 37 |
-
max_retries=2
|
| 38 |
-
)
|
| 39 |
-
|
| 40 |
def get_data(url):
|
| 41 |
"""
|
| 42 |
Fetches data from the given URL using scrapegraphai.
|
|
@@ -72,60 +59,63 @@ def convert_to_excel(data):
|
|
| 72 |
return buffer.getvalue()
|
| 73 |
|
| 74 |
|
| 75 |
-
# Main function
|
| 76 |
def main():
|
| 77 |
st.sidebar.title("Quantilytix Grant Scraper")
|
| 78 |
|
|
|
|
|
|
|
| 79 |
if "scraped_data" not in st.session_state:
|
| 80 |
st.session_state.scraped_data = None
|
| 81 |
|
| 82 |
if "chat_history" not in st.session_state:
|
| 83 |
st.session_state.chat_history = []
|
| 84 |
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
| 93 |
-
if st.session_state.scraped_data
|
| 94 |
-
st.
|
| 95 |
|
| 96 |
-
|
| 97 |
-
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
|
| 98 |
-
embeddings = GoogleGenerativeAIEmbeddings()
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
-
|
| 106 |
-
qa_chain = ConversationalRetrievalChain.from_llm(
|
| 107 |
-
llm,
|
| 108 |
-
retriever=db.as_retriever()
|
| 109 |
-
)
|
| 110 |
|
| 111 |
if st.sidebar.button("Load as Knowledge Base"):
|
| 112 |
st.session_state.chat_interface_active = True
|
| 113 |
|
| 114 |
if "chat_interface_active" in st.session_state and st.session_state.chat_interface_active:
|
| 115 |
-
st.write("Chat Interface Loaded. Start asking questions about the
|
| 116 |
|
| 117 |
-
query = st.text_input("Ask a question about the
|
| 118 |
if query:
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
except Exception as e:
|
| 128 |
-
st.error(f"Error generating response: {e}")
|
| 129 |
|
| 130 |
if __name__ == "__main__":
|
| 131 |
-
main()
|
|
|
|
| 7 |
import os
|
| 8 |
import subprocess
|
| 9 |
import io
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Ensure Playwright installs required browsers and dependencies
|
| 12 |
subprocess.run(["playwright", "install"])
|
|
|
|
| 24 |
},
|
| 25 |
}
|
| 26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
def get_data(url):
|
| 28 |
"""
|
| 29 |
Fetches data from the given URL using scrapegraphai.
|
|
|
|
| 59 |
return buffer.getvalue()
|
| 60 |
|
| 61 |
|
|
|
|
| 62 |
def main():
|
| 63 |
st.sidebar.title("Quantilytix Grant Scraper")
|
| 64 |
|
| 65 |
+
url = st.sidebar.text_input("Enter URL")
|
| 66 |
+
|
| 67 |
if "scraped_data" not in st.session_state:
|
| 68 |
st.session_state.scraped_data = None
|
| 69 |
|
| 70 |
if "chat_history" not in st.session_state:
|
| 71 |
st.session_state.chat_history = []
|
| 72 |
|
| 73 |
+
if st.sidebar.button("Get grants"):
|
| 74 |
+
if url:
|
| 75 |
+
try:
|
| 76 |
+
with st.spinner("Retrieving Grants, Please Wait...."):
|
| 77 |
+
result = get_data(url)
|
| 78 |
+
st.session_state.scraped_data = result # Store result in session state
|
| 79 |
+
st.success("Data scraped successfully!")
|
| 80 |
+
except Exception as e:
|
| 81 |
+
st.error(f"Error scraping data: {e}")
|
| 82 |
+
else:
|
| 83 |
+
st.warning("Please enter a URL.")
|
| 84 |
|
| 85 |
+
if st.session_state.scraped_data:
|
| 86 |
+
selected_format = st.sidebar.selectbox("Select Download Format", ("CSV", "Excel"))
|
| 87 |
|
| 88 |
+
result = st.session_state.scraped_data # Access the saved result
|
|
|
|
|
|
|
| 89 |
|
| 90 |
+
if selected_format == "CSV":
|
| 91 |
+
csv_data = convert_to_csv(result)
|
| 92 |
+
b64 = base64.b64encode(csv_data).decode()
|
| 93 |
+
download_link = f"<a href='data:application/vnd.ms-excel;base64,{b64}' download='grants.csv'>Download CSV</a>"
|
| 94 |
+
st.sidebar.markdown(download_link, unsafe_allow_html=True)
|
| 95 |
+
elif selected_format == "Excel":
|
| 96 |
+
excel_data = convert_to_excel(result)
|
| 97 |
+
b64 = base64.b64encode(excel_data).decode()
|
| 98 |
+
download_link = f"<a href='data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}' download='grants.xlsx'>Download Excel</a>"
|
| 99 |
+
st.sidebar.markdown(download_link, unsafe_allow_html=True)
|
| 100 |
|
| 101 |
+
st.dataframe(result['grants'])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
if st.sidebar.button("Load as Knowledge Base"):
|
| 104 |
st.session_state.chat_interface_active = True
|
| 105 |
|
| 106 |
if "chat_interface_active" in st.session_state and st.session_state.chat_interface_active:
|
| 107 |
+
st.write("Chat Interface Loaded. Start asking questions about the grants!")
|
| 108 |
|
| 109 |
+
query = st.text_input("Ask a question about the grants:", key="chat_input")
|
| 110 |
if query:
|
| 111 |
+
# Placeholder for response generation logic
|
| 112 |
+
response = f"Response to '{query}' based on the knowledge base." # Simulated response
|
| 113 |
+
st.session_state.chat_history.append({"query": query, "response": response})
|
| 114 |
+
|
| 115 |
+
# Display chat history
|
| 116 |
+
for chat in st.session_state.chat_history:
|
| 117 |
+
st.write(f"**You:** {chat['query']}")
|
| 118 |
+
st.write(f"**Grants Bot:** {chat['response']}")
|
|
|
|
|
|
|
| 119 |
|
| 120 |
if __name__ == "__main__":
|
| 121 |
+
main()
|