Final_Assignment_Template1

Sleeping

MainStreet123 commited on 24 days ago

Commit

9ef3450

verified ·

1 Parent(s): 48cfa06

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import re
 import gradio as gr
 import requests
 import pandas as pd
-from bs4 import BeautifulSoup
 try:
     from dotenv import load_dotenv
@@ -42,10 +41,18 @@ def tool_web_page_view(url: str) -> str:
         headers = {"User-Agent": "Mozilla/5.0 (compatible; ReActAgent/1.0)"}
         r = requests.get(url, timeout=15, headers=headers)
         r.raise_for_status()
-        soup = BeautifulSoup(r.text, "html.parser")
-        for tag in soup(["script", "style", "nav", "footer", "header"]):
-            tag.decompose()
-        text = soup.get_text(separator="\n", strip=True)
         return text[:8000] if len(text) > 8000 else text or "No text content found."
     except Exception as e:
         return f"Web page view error: {e}"

 import gradio as gr
 import requests
 import pandas as pd
 try:
     from dotenv import load_dotenv
         headers = {"User-Agent": "Mozilla/5.0 (compatible; ReActAgent/1.0)"}
         r = requests.get(url, timeout=15, headers=headers)
         r.raise_for_status()
+        html = r.text
+        try:
+            from bs4 import BeautifulSoup
+            soup = BeautifulSoup(html, "html.parser")
+            for tag in soup(["script", "style", "nav", "footer", "header"]):
+                tag.decompose()
+            text = soup.get_text(separator="\n", strip=True)
+        except ImportError:
+            text = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE)
+            text = re.sub(r"<style[^>]*>.*?</style>", "", text, flags=re.DOTALL | re.IGNORECASE)
+            text = re.sub(r"<[^>]+>", " ", text)
+            text = re.sub(r"\s+", " ", text).strip()
         return text[:8000] if len(text) > 8000 else text or "No text content found."
     except Exception as e:
         return f"Web page view error: {e}"