Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -48,9 +48,12 @@ def llava(message, history):
|
|
| 48 |
|
| 49 |
def extract_text_from_webpage(html_content):
|
| 50 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 51 |
-
for tag in soup(["script", "style", "header", "footer"]
|
| 52 |
tag.extract()
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
|
| 56 |
def search(query):
|
|
|
|
| 48 |
|
| 49 |
def extract_text_from_webpage(html_content):
|
| 50 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 51 |
+
for tag in soup(["script", "style", "header", "footer"]:
|
| 52 |
tag.extract()
|
| 53 |
+
visible_text = soup.get_text(strip=True)
|
| 54 |
+
if len(visible_text) > max_chars_per_page and visible_text.endswith("..."):
|
| 55 |
+
visible_text = visible_text[:max_chars_per_page]
|
| 56 |
+
return visible_text
|
| 57 |
|
| 58 |
|
| 59 |
def search(query):
|