Spaces:
Paused
Paused
itsOwen commited on
Commit ·
a8d4530
1
Parent(s): bc8669b
few fixes
Browse files- README.md +5 -0
- app/__pycache__/streamlit_web_scraper_chat.cpython-312.pyc +0 -0
- app/__pycache__/ui_components.cpython-312.pyc +0 -0
- app/__pycache__/utils.cpython-312.pyc +0 -0
- app/utils.py +25 -1
- main.py +36 -25
- requirements.txt +3 -1
- src/__pycache__/__init__.cpython-312.pyc +0 -0
- src/__pycache__/models.cpython-312.pyc +0 -0
- src/__pycache__/web_extractor.cpython-312.pyc +0 -0
- src/scrapers/__pycache__/__init__.cpython-312.pyc +0 -0
- src/scrapers/__pycache__/base_scraper.cpython-312.pyc +0 -0
- src/scrapers/__pycache__/html_scraper.cpython-312.pyc +0 -0
- src/scrapers/__pycache__/json_scraper.cpython-312.pyc +0 -0
- src/scrapers/__pycache__/playwright_scraper.cpython-312.pyc +0 -0
- src/utils/__pycache__/__init__.cpython-312.pyc +0 -0
- src/utils/__pycache__/markdown_formatter.cpython-312.pyc +0 -0
- src/utils/__pycache__/proxy_manager.cpython-312.pyc +0 -0
README.md
CHANGED
|
@@ -55,6 +55,11 @@ Check out our [YouTube video](https://www.youtube.com/watch?v=iATSd5Ijl4M) for a
|
|
| 55 |
pip install -r requirements.txt
|
| 56 |
```
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
## 🚀 Usage
|
| 59 |
|
| 60 |
1. Fire up the Streamlit app:
|
|
|
|
| 55 |
pip install -r requirements.txt
|
| 56 |
```
|
| 57 |
|
| 58 |
+
4. Install the playwright:
|
| 59 |
+
```bash
|
| 60 |
+
playwright install
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
## 🚀 Usage
|
| 64 |
|
| 65 |
1. Fire up the Streamlit app:
|
app/__pycache__/streamlit_web_scraper_chat.cpython-312.pyc
CHANGED
|
Binary files a/app/__pycache__/streamlit_web_scraper_chat.cpython-312.pyc and b/app/__pycache__/streamlit_web_scraper_chat.cpython-312.pyc differ
|
|
|
app/__pycache__/ui_components.cpython-312.pyc
CHANGED
|
Binary files a/app/__pycache__/ui_components.cpython-312.pyc and b/app/__pycache__/ui_components.cpython-312.pyc differ
|
|
|
app/__pycache__/utils.cpython-312.pyc
CHANGED
|
Binary files a/app/__pycache__/utils.cpython-312.pyc and b/app/__pycache__/utils.cpython-312.pyc differ
|
|
|
app/utils.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
import random
|
| 2 |
|
| 3 |
def get_loading_message():
|
|
@@ -13,4 +15,26 @@ def get_loading_message():
|
|
| 13 |
"Scraping the net, one byte at a time...",
|
| 14 |
"Crashing through the data barriers, Johnny-style..."
|
| 15 |
]
|
| 16 |
-
return random.choice(messages)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import time
|
| 3 |
import random
|
| 4 |
|
| 5 |
def get_loading_message():
|
|
|
|
| 15 |
"Scraping the net, one byte at a time...",
|
| 16 |
"Crashing through the data barriers, Johnny-style..."
|
| 17 |
]
|
| 18 |
+
return random.choice(messages)
|
| 19 |
+
|
| 20 |
+
def loading_animation(process_func, *args, **kwargs):
|
| 21 |
+
loading_placeholder = st.empty()
|
| 22 |
+
result = None
|
| 23 |
+
start_time = time.time()
|
| 24 |
+
while result is None:
|
| 25 |
+
elapsed_time = time.time() - start_time
|
| 26 |
+
if elapsed_time > 30: # Timeout after 30 seconds
|
| 27 |
+
loading_placeholder.error("Request timed out. Please try again.")
|
| 28 |
+
return None
|
| 29 |
+
|
| 30 |
+
loading_message = get_loading_message()
|
| 31 |
+
loading_placeholder.text(f"{loading_message}\n\nIn progress...")
|
| 32 |
+
|
| 33 |
+
try:
|
| 34 |
+
result = process_func(*args, **kwargs)
|
| 35 |
+
except Exception as e:
|
| 36 |
+
loading_placeholder.error(f"An error occurred: {str(e)}. Retrying...")
|
| 37 |
+
time.sleep(1)
|
| 38 |
+
|
| 39 |
+
loading_placeholder.empty()
|
| 40 |
+
return result
|
main.py
CHANGED
|
@@ -1,8 +1,20 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import time
|
| 3 |
from app.streamlit_web_scraper_chat import StreamlitWebScraperChat
|
| 4 |
from app.ui_components import display_info_icons, display_message
|
| 5 |
-
from app.utils import get_loading_message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def main():
|
| 8 |
st.set_page_config(page_title="CyberScraper 2077", page_icon="🌐", layout="wide")
|
|
@@ -32,7 +44,10 @@ def main():
|
|
| 32 |
|
| 33 |
if "messages" not in st.session_state:
|
| 34 |
st.session_state.messages = []
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
| 36 |
chat_container = st.container()
|
| 37 |
|
| 38 |
with chat_container:
|
|
@@ -40,30 +55,26 @@ def main():
|
|
| 40 |
with st.chat_message(message["role"]):
|
| 41 |
display_message(message)
|
| 42 |
|
| 43 |
-
if prompt := st.chat_input("Enter the URL to scrape or ask a question regarding the data"):
|
| 44 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
st.markdown(prompt)
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
display_message({"role": "assistant", "content": full_response})
|
| 65 |
-
|
| 66 |
-
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 67 |
|
| 68 |
st.markdown(
|
| 69 |
"""
|
|
|
|
| 1 |
import streamlit as st
|
|
|
|
| 2 |
from app.streamlit_web_scraper_chat import StreamlitWebScraperChat
|
| 3 |
from app.ui_components import display_info_icons, display_message
|
| 4 |
+
from app.utils import loading_animation, get_loading_message
|
| 5 |
+
|
| 6 |
+
def safe_process_message(web_scraper_chat, message):
|
| 7 |
+
if message is None or message.strip() == "":
|
| 8 |
+
return "I'm sorry, but I didn't receive any input. Could you please try again?"
|
| 9 |
+
try:
|
| 10 |
+
return web_scraper_chat.process_message(message)
|
| 11 |
+
except AttributeError as e:
|
| 12 |
+
if "'NoneType' object has no attribute 'lower'" in str(e):
|
| 13 |
+
return "I encountered an issue while processing your request. It seems like I received an unexpected empty value. Could you please try rephrasing your input?"
|
| 14 |
+
else:
|
| 15 |
+
raise e
|
| 16 |
+
except Exception as e:
|
| 17 |
+
return f"An unexpected error occurred: {str(e)}. Please try again or contact support if the issue persists."
|
| 18 |
|
| 19 |
def main():
|
| 20 |
st.set_page_config(page_title="CyberScraper 2077", page_icon="🌐", layout="wide")
|
|
|
|
| 44 |
|
| 45 |
if "messages" not in st.session_state:
|
| 46 |
st.session_state.messages = []
|
| 47 |
+
|
| 48 |
+
if "processing" not in st.session_state:
|
| 49 |
+
st.session_state.processing = False
|
| 50 |
+
|
| 51 |
chat_container = st.container()
|
| 52 |
|
| 53 |
with chat_container:
|
|
|
|
| 55 |
with st.chat_message(message["role"]):
|
| 56 |
display_message(message)
|
| 57 |
|
| 58 |
+
if prompt := st.chat_input("Enter the URL to scrape or ask a question regarding the data", key="user_input"):
|
| 59 |
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 60 |
+
st.session_state.processing = True
|
| 61 |
+
st.rerun()
|
|
|
|
| 62 |
|
| 63 |
+
if st.session_state.processing:
|
| 64 |
+
with st.chat_message("assistant"):
|
| 65 |
+
try:
|
| 66 |
+
full_response = loading_animation(
|
| 67 |
+
safe_process_message,
|
| 68 |
+
st.session_state.web_scraper_chat,
|
| 69 |
+
st.session_state.messages[-1]["content"]
|
| 70 |
+
)
|
| 71 |
+
if full_response is not None:
|
| 72 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 73 |
+
except Exception as e:
|
| 74 |
+
st.error(f"An unexpected error occurred: {str(e)}")
|
| 75 |
+
finally:
|
| 76 |
+
st.session_state.processing = False
|
| 77 |
+
st.rerun()
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
st.markdown(
|
| 80 |
"""
|
requirements.txt
CHANGED
|
@@ -4,6 +4,7 @@ pandas
|
|
| 4 |
tiktoken
|
| 5 |
langchain
|
| 6 |
langchain-community
|
|
|
|
| 7 |
openai
|
| 8 |
playwright
|
| 9 |
openpyxl
|
|
@@ -11,4 +12,5 @@ xlsxwriter
|
|
| 11 |
beautifulsoup4
|
| 12 |
markdown
|
| 13 |
aiohttp
|
| 14 |
-
python-dotenv
|
|
|
|
|
|
| 4 |
tiktoken
|
| 5 |
langchain
|
| 6 |
langchain-community
|
| 7 |
+
langchain_openai
|
| 8 |
openai
|
| 9 |
playwright
|
| 10 |
openpyxl
|
|
|
|
| 12 |
beautifulsoup4
|
| 13 |
markdown
|
| 14 |
aiohttp
|
| 15 |
+
python-dotenv
|
| 16 |
+
watchdog
|
src/__pycache__/__init__.cpython-312.pyc
CHANGED
|
Binary files a/src/__pycache__/__init__.cpython-312.pyc and b/src/__pycache__/__init__.cpython-312.pyc differ
|
|
|
src/__pycache__/models.cpython-312.pyc
CHANGED
|
Binary files a/src/__pycache__/models.cpython-312.pyc and b/src/__pycache__/models.cpython-312.pyc differ
|
|
|
src/__pycache__/web_extractor.cpython-312.pyc
CHANGED
|
Binary files a/src/__pycache__/web_extractor.cpython-312.pyc and b/src/__pycache__/web_extractor.cpython-312.pyc differ
|
|
|
src/scrapers/__pycache__/__init__.cpython-312.pyc
CHANGED
|
Binary files a/src/scrapers/__pycache__/__init__.cpython-312.pyc and b/src/scrapers/__pycache__/__init__.cpython-312.pyc differ
|
|
|
src/scrapers/__pycache__/base_scraper.cpython-312.pyc
CHANGED
|
Binary files a/src/scrapers/__pycache__/base_scraper.cpython-312.pyc and b/src/scrapers/__pycache__/base_scraper.cpython-312.pyc differ
|
|
|
src/scrapers/__pycache__/html_scraper.cpython-312.pyc
CHANGED
|
Binary files a/src/scrapers/__pycache__/html_scraper.cpython-312.pyc and b/src/scrapers/__pycache__/html_scraper.cpython-312.pyc differ
|
|
|
src/scrapers/__pycache__/json_scraper.cpython-312.pyc
CHANGED
|
Binary files a/src/scrapers/__pycache__/json_scraper.cpython-312.pyc and b/src/scrapers/__pycache__/json_scraper.cpython-312.pyc differ
|
|
|
src/scrapers/__pycache__/playwright_scraper.cpython-312.pyc
CHANGED
|
Binary files a/src/scrapers/__pycache__/playwright_scraper.cpython-312.pyc and b/src/scrapers/__pycache__/playwright_scraper.cpython-312.pyc differ
|
|
|
src/utils/__pycache__/__init__.cpython-312.pyc
CHANGED
|
Binary files a/src/utils/__pycache__/__init__.cpython-312.pyc and b/src/utils/__pycache__/__init__.cpython-312.pyc differ
|
|
|
src/utils/__pycache__/markdown_formatter.cpython-312.pyc
CHANGED
|
Binary files a/src/utils/__pycache__/markdown_formatter.cpython-312.pyc and b/src/utils/__pycache__/markdown_formatter.cpython-312.pyc differ
|
|
|
src/utils/__pycache__/proxy_manager.cpython-312.pyc
CHANGED
|
Binary files a/src/utils/__pycache__/proxy_manager.cpython-312.pyc and b/src/utils/__pycache__/proxy_manager.cpython-312.pyc differ
|
|
|