rairo commited on
Commit
fdadbb0
·
verified ·
1 Parent(s): 45e9c8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -21
app.py CHANGED
@@ -1,6 +1,10 @@
1
  import streamlit as st
2
  import asyncio
3
  from crawl4ai import AsyncWebCrawler
 
 
 
 
4
 
5
  st.set_page_config(page_title="Web Crawler App", layout="wide")
6
 
@@ -16,32 +20,35 @@ with st.expander("Advanced Options"):
16
  max_pages = st.number_input("Max Pages to Crawl", min_value=1, max_value=100, value=10)
17
 
18
  # Function to run the crawler
19
- async def run_crawler(url, max_depth=1, timeout=30, max_pages=10):
20
- async with AsyncWebCrawler() as crawler:
21
- result = await crawler.arun(
22
- url=url,
23
- max_depth=max_depth,
24
- timeout=timeout,
25
- max_pages=max_pages
26
- )
27
- return result.markdown
 
 
 
 
28
 
29
  # Button to start crawling
30
  if st.button("Start Crawling"):
31
- with st.spinner("Crawling in progress..."):
32
- # We need to run the async function in a way that works with Streamlit
33
- loop = asyncio.new_event_loop()
34
- asyncio.set_event_loop(loop)
35
- try:
36
- result = loop.run_until_complete(run_crawler(
37
  url=url,
38
  max_depth=max_depth,
39
  timeout=timeout,
40
  max_pages=max_pages
41
- ))
 
42
  # Display the results
43
  st.subheader("Crawl Results")
44
  st.markdown(result)
 
45
  # Option to download results
46
  st.download_button(
47
  label="Download Results",
@@ -49,11 +56,10 @@ if st.button("Start Crawling"):
49
  file_name="crawl_results.md",
50
  mime="text/markdown"
51
  )
52
- except Exception as e:
53
- st.error(f"An error occurred: {str(e)}")
54
- finally:
55
- loop.close()
56
 
57
  # Add footer with information
58
  st.markdown("---")
59
- st.markdown("This app uses the crawl4ai library to extract content from web pages.")
 
1
  import streamlit as st
2
  import asyncio
3
  from crawl4ai import AsyncWebCrawler
4
+ import nest_asyncio
5
+
6
+ # Apply nest_asyncio to allow nested event loops
7
+ nest_asyncio.apply()
8
 
9
  st.set_page_config(page_title="Web Crawler App", layout="wide")
10
 
 
20
  max_pages = st.number_input("Max Pages to Crawl", min_value=1, max_value=100, value=10)
21
 
22
  # Function to run the crawler
23
+ def run_async_crawler(url, max_depth=1, timeout=30, max_pages=10):
24
+ async def _run():
25
+ async with AsyncWebCrawler() as crawler:
26
+ result = await crawler.arun(
27
+ url=url,
28
+ max_depth=max_depth,
29
+ timeout=timeout,
30
+ max_pages=max_pages
31
+ )
32
+ return result.markdown
33
+
34
+ # Use the current event loop with nest_asyncio applied
35
+ return asyncio.get_event_loop().run_until_complete(_run())
36
 
37
  # Button to start crawling
38
  if st.button("Start Crawling"):
39
+ try:
40
+ with st.spinner("Crawling in progress..."):
41
+ result = run_async_crawler(
 
 
 
42
  url=url,
43
  max_depth=max_depth,
44
  timeout=timeout,
45
  max_pages=max_pages
46
+ )
47
+
48
  # Display the results
49
  st.subheader("Crawl Results")
50
  st.markdown(result)
51
+
52
  # Option to download results
53
  st.download_button(
54
  label="Download Results",
 
56
  file_name="crawl_results.md",
57
  mime="text/markdown"
58
  )
59
+ except Exception as e:
60
+ st.error(f"An error occurred: {str(e)}")
61
+ st.error("If you're seeing browser launch errors, make sure you have the required dependencies installed.")
 
62
 
63
  # Add footer with information
64
  st.markdown("---")
65
+ st.info("This app uses the crawl4ai library to extract content from web pages. The crawler may require additional dependencies if it's using a headless browser.")