Spaces:

Starchik1
/

test

Sleeping

App Files Files Community

Starchik1 commited on Jun 15, 2024

Commit

1483245

verified ·

1 Parent(s): 6d5f4f4

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -43

app.py CHANGED Viewed

@@ -1,50 +1,53 @@
 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
-from urllib.parse import urljoin
-# Функция для загрузки HTML-страницы
-def fetch_page(url):
-    try:
-        response = requests.get(url)
-        response.raise_for_status()  # Генерирует исключение для неправильных ответов
-        return response.text
-    except requests.exceptions.RequestException as e:
-        st.error(f"Ошибка при загрузке страницы: {e}")
-        return None
-# Функция для парсинга страницы и извлечения данных
 def parse_page(html):
-    try:
-        soup = BeautifulSoup(html, 'html.parser')
-        content = soup.find_all('div', class_='wap_view')
-        if not content:
-            return None
-        return content
-    except Exception as e:
-        st.error(f"Ошибка при парсинге страницы: {e}")
-        return None
-# Основная часть Streamlit приложения
 def main():
-    st.title('Superetka ETKA WAP Viewer')
-    url = "https://superetka.com/etka/wap.php"
-    html = fetch_page(url)
-    if html:
-        content = parse_page(html)
         if content:
-            for block in content:
-                for elem in block.find_all(['p', 'a', 'img']):
-                    if elem.name == 'a' and elem.get('href'):
-                        link_url = urljoin(url, elem.get('href'))
-                        st.markdown(f"[{elem.text}]({link_url})")
-                    elif elem.name == 'img' and elem.get('src'):
-                        img_url = urljoin(url, elem.get('src'))
-                        st.image(img_url, caption=elem.get('alt', ''))
-                    else:
-                        st.markdown(elem.prettify())
-# Запуск приложения
-if __name__ == "__main__":
     main()

 import streamlit as st
 import requests
 from bs4 import BeautifulSoup
+from PIL import Image
+from io import BytesIO
+# Функция для получения HTML-кода страницы
+def get_html(url):
+    response = requests.get(url)
+    return response.text
+# Функция для парсинга страницы с помощью BeautifulSoup
 def parse_page(html):
+    soup = BeautifulSoup(html, 'html.parser')
+    return soup
+# Основная функция для отображения информации
 def main():
+    st.title('Информация с сайта Superetka')
+    # URL для парсинга
+    url = 'https://superetka.com/etka/wap.php'
+    # Получаем HTML-код страницы
+    html = get_html(url)
+    # Парсим страницу
+    soup = parse_page(html)
+    # Ищем все заголовки
+    headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
+    # Выводим заголовки и их содержимое
+    for heading in headings:
+        st.header(heading.text.strip())
+        # Если есть изображение, выводим его
+        if heading.find_next('img'):
+            img_url = heading.find_next('img')['src']
+            img_response = requests.get(img_url)
+            img = Image.open(BytesIO(img_response.content))
+            st.image(img, caption='Изображение')
+        # Выводим текст под заголовком
+        content = heading.find_next_sibling()
         if content:
+            st.markdown(content.text.strip(), unsafe_allow_html=True)
+        st.write('---')  # Разделитель между блоками
+# Запускаем основную функцию
+if __name__ == '__main__':
     main()