Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -130,6 +130,21 @@ def extract_background_from_url(url: str):
|
|
| 130 |
background_section = extract_background(content)
|
| 131 |
return {"background": background_section}
|
| 132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
def search_web(topic, max_references):
|
| 134 |
"""Search the web using DuckDuckGo and return results."""
|
| 135 |
doc_list = []
|
|
|
|
| 130 |
background_section = extract_background(content)
|
| 131 |
return {"background": background_section}
|
| 132 |
|
| 133 |
+
|
| 134 |
+
@app.get("/extract-page-text/")
|
| 135 |
+
def extract_page_content(url: str, max_char: int = 5000)
|
| 136 |
+
try:
|
| 137 |
+
response = requests.get(url, timeout=20, verify=False)
|
| 138 |
+
response.raise_for_status()
|
| 139 |
+
|
| 140 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 141 |
+
full_text = soup.get_text()
|
| 142 |
+
|
| 143 |
+
text = full_text[:max_char]
|
| 144 |
+
return {"text_content": text}
|
| 145 |
+
except requests.RequestException as e:
|
| 146 |
+
return {"error": f"Error fetching the page: {str(e)}"}
|
| 147 |
+
|
| 148 |
def search_web(topic, max_references):
|
| 149 |
"""Search the web using DuckDuckGo and return results."""
|
| 150 |
doc_list = []
|