Craw4ai-example

Sleeping

App Files Files Community

rairo commited on Apr 7, 2025

Commit

fdadbb0

verified ·

1 Parent(s): 45e9c8a

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -21

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import streamlit as st
 import asyncio
 from crawl4ai import AsyncWebCrawler
 st.set_page_config(page_title="Web Crawler App", layout="wide")
@@ -16,32 +20,35 @@ with st.expander("Advanced Options"):
     max_pages = st.number_input("Max Pages to Crawl", min_value=1, max_value=100, value=10)
 # Function to run the crawler
-async def run_crawler(url, max_depth=1, timeout=30, max_pages=10):
-    async with AsyncWebCrawler() as crawler:
-        result = await crawler.arun(
-            url=url,
-            max_depth=max_depth,
-            timeout=timeout,
-            max_pages=max_pages
-        )
-        return result.markdown
 # Button to start crawling
 if st.button("Start Crawling"):
-    with st.spinner("Crawling in progress..."):
-        # We need to run the async function in a way that works with Streamlit
-        loop = asyncio.new_event_loop()
-        asyncio.set_event_loop(loop)
-        try:
-            result = loop.run_until_complete(run_crawler(
                 url=url,
                 max_depth=max_depth,
                 timeout=timeout,
                 max_pages=max_pages
-            ))
             # Display the results
             st.subheader("Crawl Results")
             st.markdown(result)
             # Option to download results
             st.download_button(
                 label="Download Results",
@@ -49,11 +56,10 @@ if st.button("Start Crawling"):
                 file_name="crawl_results.md",
                 mime="text/markdown"
             )
-        except Exception as e:
-            st.error(f"An error occurred: {str(e)}")
-        finally:
-            loop.close()
 # Add footer with information
 st.markdown("---")
-st.markdown("This app uses the crawl4ai library to extract content from web pages.")

 import streamlit as st
 import asyncio
 from crawl4ai import AsyncWebCrawler
+import nest_asyncio
+# Apply nest_asyncio to allow nested event loops
+nest_asyncio.apply()
 st.set_page_config(page_title="Web Crawler App", layout="wide")
     max_pages = st.number_input("Max Pages to Crawl", min_value=1, max_value=100, value=10)
 # Function to run the crawler
+def run_async_crawler(url, max_depth=1, timeout=30, max_pages=10):
+    async def _run():
+        async with AsyncWebCrawler() as crawler:
+            result = await crawler.arun(
+                url=url,
+                max_depth=max_depth,
+                timeout=timeout,
+                max_pages=max_pages
+            )
+            return result.markdown
+    # Use the current event loop with nest_asyncio applied
+    return asyncio.get_event_loop().run_until_complete(_run())
 # Button to start crawling
 if st.button("Start Crawling"):
+    try:
+        with st.spinner("Crawling in progress..."):
+            result = run_async_crawler(
                 url=url,
                 max_depth=max_depth,
                 timeout=timeout,
                 max_pages=max_pages
+            )
             # Display the results
             st.subheader("Crawl Results")
             st.markdown(result)
             # Option to download results
             st.download_button(
                 label="Download Results",
                 file_name="crawl_results.md",
                 mime="text/markdown"
             )
+    except Exception as e:
+        st.error(f"An error occurred: {str(e)}")
+        st.error("If you're seeing browser launch errors, make sure you have the required dependencies installed.")
 # Add footer with information
 st.markdown("---")
+st.info("This app uses the crawl4ai library to extract content from web pages. The crawler may require additional dependencies if it's using a headless browser.")