Spaces:
Build error
Build error
| import requests | |
| from bs4 import BeautifulSoup | |
| try: | |
| from ..character import set_website_content | |
| except: | |
| from gpt_computer_agent.character import set_website_content | |
| def train(url: str) -> bool: | |
| try: | |
| # Go to url and extract these elements | |
| meta_properties = [ | |
| "og:description", | |
| "og:site_name", | |
| "og:title", | |
| "og:type", | |
| "og:url", | |
| ] | |
| # Fetch the webpage content | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.content, "html.parser") | |
| # Extract the meta tags | |
| meta_tags = soup.find_all("meta") | |
| # Initialize the data dictionary | |
| data = {} | |
| # Loop through the meta tags and extract the content | |
| for tag in meta_tags: | |
| if tag.get("property") in meta_properties: | |
| data[tag.get("property")] = tag.get("content") | |
| # Also add the other useful information texts from the webpage | |
| data["title"] = soup.title.string | |
| data["h1"] = soup.h1.string | |
| data["p"] = soup.p.string | |
| text = soup.get_text(separator="\n", strip=True) | |
| data["text"] = text | |
| data["url"] = url | |
| # Now create an string with good looking like this | |
| # Title: {title} | |
| the_string = "" | |
| for key, value in data.items(): | |
| the_string += f"{key}: {value}\n" | |
| set_website_content(the_string) | |
| return True | |
| except Exception as e: | |
| return e | |