DataSetGo

Sleeping

App Files Files Community

acecalisto3 commited on Oct 31, 2024

Commit

c4c5e82

verified ·

1 Parent(s): ef30ef7

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -274

app.py CHANGED Viewed

@@ -1,30 +1,30 @@
-import streamlit as st
-import requests
 import os
 import urllib
-import base64
-from bs4 import BeautifulSoup
 import hashlib
-import json
-import uuid
 import logging
-from typing import Optional, Dict, List, Any
-from pathlib import Path
-from shot_scraper import shotscraper  # importing shot-scraper
-import feedgenerator  # Import RSS feed generator
-# set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# constants
-excluded_files = [
     'app.py', 'requirements.txt', 'pre-requirements.txt',
     'packages.txt', 'readme.md', '.gitattributes',
     "backup.py", "dockerfile"
 ]
-urls = {
     "chordify - play along chords": "https://chordify.net/",
     "national guitar academy - guitar learning": "https://www.guitaracademy.com/",
     "ultimate guitar - massive song database": "https://www.ultimate-guitar.com/",
@@ -38,146 +38,102 @@ urls = {
     "john lennon": "https://www.ultimate-guitar.com/search.php?search_type=title&value=john%20lennon",
 }
-def initialize_history() -> None:
-    """Initialize history.json if it doesn't exist."""
-    if not os.path.exists("history.json"):
-        with open("history.json", "w") as f:
-            json.dump({}, f)
-def download_file(url: str, local_filename: str) -> Optional[str]:
-    """
-    Download a file from a URL to a local file.
-    Args:
-        url (str): The URL to download from
-        local_filename (str): The local file path to save to
-    Returns:
-        Optional[str]: The local filename if successful, None otherwise
-    """
-    if url.startswith(('http://', 'https://')):
-        try:
-            with requests.get(url, stream=True) as r:
-                r.raise_for_status()
-                with open(local_filename, 'wb') as f:
-                    for chunk in r.iter_content(chunk_size=8192):
-                        f.write(chunk)
-            return local_filename
-        except requests.exceptions.HTTPError as err:
-            logger.error(f"HTTP error occurred: {err}")
-            return None
-    return None
 def download_html_and_files(url: str, subdir: str) -> None:
-    """
-    Download HTML/XML content and associated files from a URL.
-    Args:
-        url (str): The URL to download content from
-        subdir (str): The subdirectory to save files to
-    """
     try:
         os.makedirs(subdir, exist_ok=True)
         response = requests.get(url, timeout=30)
         response.raise_for_status()
         content = response.text
-        # Determine if content is XML or HTML
-        is_xml = url.endswith('.xml') or '<rss' in content[:1000] or '<?xml' in content[:1000]
-        try:
-            if is_xml:
-                soup = BeautifulSoup(content, 'xml')  # Use XML parser for XML content
-                st.info("Processing XML content...")
-            else:
-                soup = BeautifulSoup(content, 'html.parser')
-                st.info("Processing HTML content...")
-        except Exception as e:
-            # Try alternative parser if first attempt fails
-            try:
-                soup = BeautifulSoup(content, 'lxml')
-                st.info("Using alternative parser (lxml)...")
-            except Exception as inner_e:
-                logger.error(f"Failed to parse content: {e}, {inner_e}")
-                st.error(f"Failed to parse content from {url}")
-                return
         base_url = urllib.parse.urlunparse(
             urllib.parse.urlparse(url)._replace(
                 path='', params='', query='', fragment=''
             )
         )
-        # Handle links differently for XML and HTML
-        if is_xml:
-            # For XML, look for specific tags that might contain links
-            link_tags = (
-                soup.find_all('link') +
-                soup.find_all('url') +
-                soup.find_all('enclosure') +
-                soup.find_all('media:content')
-            )
-            for link in link_tags:
-                try:
-                    # Get URL from appropriate attribute
-                    href = (
-                        link.get('href') or
-                        link.get('url') or
-                        link.get('src') or
-                        link.text.strip()
-                    )
-                    if href and (href.startswith('http://') or href.startswith('https://')):
-                        file_url = href
-                        local_filename = os.path.join(
-                            subdir,
-                            urllib.parse.urlparse(file_url).path.split('/')[-1]
-                        )
-                        if local_filename and not local_filename.endswith('/'):
-                            download_file(file_url, local_filename)
-                except Exception as e:
-                    logger.error(f"Failed to process XML link: {e}")
-                    continue
-        else:
-            # Original HTML processing
-            for link in soup.find_all('a'):
-                href = link.get('href')
-                if not href:
-                    continue
                 try:
                     file_url = urllib.parse.urljoin(base_url, href)
                     local_filename = os.path.join(
-                        subdir,
                         urllib.parse.urlparse(file_url).path.split('/')[-1]
                     )
-                    if not local_filename or local_filename.endswith('/'):
-                        continue
                     if local_filename != subdir:
                         link['href'] = local_filename
                         download_file(file_url, local_filename)
                 except Exception as e:
                     logger.error(f"Failed to process HTML link {href}: {e}")
-                    continue
-        # Save the processed content
-        try:
-            output_filename = "feed.xml" if is_xml else "index.html"
-            with open(os.path.join(subdir, output_filename), "w", encoding='utf-8') as file:
-                file.write(str(soup))
-            st.success(f"Content saved as {output_filename}")
-        except Exception as e:
-            logger.error(f"Failed to save content file: {e}")
-            st.error("Failed to save downloaded content")
     except requests.exceptions.RequestException as e:
         logger.error(f"Failed to download content from {url}: {e}")
         st.error(f"Failed to download content from {url}")
@@ -185,166 +141,99 @@ def download_html_and_files(url: str, subdir: str) -> None:
         logger.error(f"Unexpected error while downloading content: {e}")
         st.error("An unexpected error occurred while downloading content")
-def list_files(directory_path: str = '.') -> List[str]:
-    """List all files in directory excluding EXCLUDED_FILES."""
-    files = [f for f in os.listdir(directory_path)
-             if os.path.isfile(os.path.join(directory_path, f))]
-    return [f for f in files if f not in EXCLUDED_FILES]
-def file_editor(file_path: str) -> None:
-    """Edit file content using Streamlit text area."""
-    st.write(f"Editing File: {os.path.basename(file_path)}")
-    try:
-        with open(file_path, "r", encoding='utf-8') as f:
-            file_content = f.read()
-    except Exception as e:
-        logger.error(f"Failed to read file {file_path}: {e}")
-        st.error("Failed to read file")
-        return
-    edited_content = st.text_area(
-        "Edit the file content:",
-        value=file_content,
-        height=250
-    )
-    if st.button("💾 Save"):
-        try:
-            with open(file_path, "w", encoding='utf-8') as f:
-                f.write(edited_content)
-            st.success(f"File '{os.path.basename(file_path)}' saved!")
-        except Exception as e:
-            logger.error(f"Failed to save file {file_path}: {e}")
-            st.error("Failed to save file")
-def show_file_operations(file_path: str, sequence_number: int) -> None:
-    """Show file operations UI for a given file."""
-    unique_key = hashlib.md5(file_path.encode()).hexdigest()
-    file_content = ""
-    col01, col02, col1, col2, col3 = st.columns(5)
-    with col01:
-        st.write(os.path.basename(file_path))
-    with col1:
-        edit_key = f"edit_{unique_key}_{sequence_number}"
-        if st.button("✏️ Edit", key=edit_key):
-            try:
-                with open(file_path, "r", encoding='utf-8') as f:
-                    file_content = f.read()
-                text_area_key = f"text_area_{unique_key}_{sequence_number}"
-                file_content = st.text_area(
-                    "Edit the file content:",
-                    value=file_content,
-                    height=250,
-                    key=text_area_key
-                )
-            except Exception as e:
-                logger.error(f"Failed to read file {file_path}: {e}")
-                st.error("Failed to read file")
-    with col2:
-        save_key = f"save_{unique_key}_{sequence_number}"
-        if st.button("💾 Save", key=save_key):
-            if file_content:
-                try:
-                    with open(file_path, "w", encoding='utf-8') as f:
-                        f.write(file_content)
-                    st.success("File saved!")
-                except Exception as e:
-                    logger.error(f"Failed to save file {file_path}: {e}")
-                    st.error("Failed to save file")
-    with col3:
-        delete_key = f"delete_{unique_key}_{sequence_number}"
-        if st.button("🗑️ Delete", key=delete_key):
-            try:
-                os.remove(file_path)
-                st.success("File deleted!")
-            except Exception as e:
-                logger.error(f"Failed to delete file {file_path}: {e}")
-                st.error("Failed to delete file")
-def get_download_link(file: str) -> str:
-    """Generate a download link for a file."""
-    try:
-        with open(file, "rb") as f:
-            bytes_content = f.read()
-            b64 = base64.b64encode(bytes_content).decode()
-            filename = os.path.basename(file)
-            return f'<a href="data:file/octet-stream;base64,{b64}" download=\'{filename}\'>Download: {filename}</a>'
-    except Exception as e:
-        logger.error(f"Failed to create download link for {file}: {e}")
-        return f"Failed to create download link for {os.path.basename(file)}"
 def show_download_links(subdir: str) -> None:
-    """Show download links for all files in a directory."""
-    global file_sequence_numbers
-    if not hasattr(show_download_links, 'file_sequence_numbers'):
-        show_download_links.file_sequence_numbers = {}
-    for file in list_files(subdir):
         file_path = os.path.join(subdir, file)
-        if file_path not in show_download_links.file_sequence_numbers:
-            show_download_links.file_sequence_numbers[file_path] = 1
-        else:
-            show_download_links.file_sequence_numbers[file_path] += 1
-        sequence_number = show_download_links.file_sequence_numbers[file_path]
         if os.path.isfile(file_path):
             st.markdown(get_download_link(file_path), unsafe_allow_html=True)
-            show_file_operations(file_path, sequence_number)
-        else:
-            st.write(f"File not found: {file}")
-# Generate RSS feed
-def generate_rss_feed():
-    feed = feedgenerator.Rss201rev2Feed(
-        title="Infinite Dataset Hub Updates",
-        link="https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub",
-        description="Latest updates from the Infinite Dataset Hub",
-        language="en"
-    )
-    for i, line in enumerate(urls):
-        dataset_name = line
-        feed.add_item(
-            title=dataset_name,
-            link=urls[dataset_name],
-            description=f"Link to {dataset_name}",
-            pubdate=time.gmtime(time.time() - 86400 * i)
         )
-    return feed.writeString('utf-8')
-def main() -> None:
-    """app.py"""
-    st.title("RSS Feed and Content Downloader")
-    # Initialize history
-    initialize_history()
-    # RSS Feed Section
     st.header("RSS Feed")
     if st.button("Generate RSS Feed"):
-        rss_feed = generate_rss_feed()
         st.success("RSS Feed generated successfully!")
-        st.markdown(rss_feed, unsafe_allow_html=True)
-    # Content Downloader Section
     st.header("Content Downloader")
-    selected_url = st.selectbox("Select a URL to download content from:", list(urls.keys()))
     subdir = st.text_input("Enter subdirectory name to save files:", "downloads")
     if st.button("Download Content"):
-        download_html_and_files(urls[selected_url], subdir)
         st.success("Content downloaded successfully!")
         show_download_links(subdir)
 if __name__ == "__main__":
-    main()

 import os
+import json
+import requests
 import urllib
 import hashlib
+import base64
 import logging
+import streamlit as st
+from bs4 import BeautifulSoup
+from typing import Optional, List
+import feedgenerator
+import time
+from streamlit_option_menu import option_menu
+# Set up logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Constants
+EXCLUDED_FILES = [
     'app.py', 'requirements.txt', 'pre-requirements.txt',
     'packages.txt', 'readme.md', '.gitattributes',
     "backup.py", "dockerfile"
 ]
+URLS = {
     "chordify - play along chords": "https://chordify.net/",
     "national guitar academy - guitar learning": "https://www.guitaracademy.com/",
     "ultimate guitar - massive song database": "https://www.ultimate-guitar.com/",
     "john lennon": "https://www.ultimate-guitar.com/search.php?search_type=title&value=john%20lennon",
 }
+# Function to toggle dark mode
+def toggle_dark_mode():
+    if 'dark_mode' not in st.session_state:
+        st.session_state.dark_mode = False
+    if st.session_state.dark_mode:
+        st.markdown('''
+        <style>
+        .stApp {
+            background-color: #2b2b2b;
+            color: #ffffff;
+        }
+        </style>
+        ''', unsafe_allow_html=True)
+    else:
+        st.markdown('''
+        <style>
+        .stApp {
+            background-color: #ffffff;
+            color: #000000;
+        }
+        </style>
+        ''', unsafe_allow_html=True)
+# Generate RSS feed
+def generate_rss_feed():
+    feed = feedgenerator.Rss201rev2Feed(
+        title="Infinite Dataset Hub Updates",
+        link="https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub",
+        description="Latest updates from the Infinite Dataset Hub",
+        language="en"
+    )
+    for i, line in enumerate(URLS):
+        dataset_name = line
+        feed.add_item(
+            title=dataset_name,
+            link=URLS[dataset_name],
+            description=f"Link to {dataset_name}",
+            pubdate=time.gmtime(time.time() - 86400 * i)
+        )
+    return feed.writeString('utf-8')
+# Download file
+def download_file(url: str, local_filename: str) -> Optional[str]:
+    try:
+        with requests.get(url, stream=True) as r:
+            r.raise_for_status()
+            with open(local_filename, 'wb') as f:
+                for chunk in r.iter_content(chunk_size=8192):
+                    f.write(chunk)
+        logger.info(f"File downloaded successfully: {local_filename}")
+        return local_filename
+    except requests.exceptions.RequestException as err:
+        logger.error(f"Error occurred while downloading {url}: {err}")
+        return None
+# Download HTML and files
 def download_html_and_files(url: str, subdir: str) -> None:
     try:
         os.makedirs(subdir, exist_ok=True)
         response = requests.get(url, timeout=30)
         response.raise_for_status()
         content = response.text
+        soup = BeautifulSoup(content, 'html.parser')
         base_url = urllib.parse.urlunparse(
             urllib.parse.urlparse(url)._replace(
                 path='', params='', query='', fragment=''
             )
         )
+        progress_bar = st.progress(0)
+        total_links = len(soup.find_all('a'))
+        for i, link in enumerate(soup.find_all('a')):
+            href = link.get('href')
+            if href:
                 try:
                     file_url = urllib.parse.urljoin(base_url, href)
                     local_filename = os.path.join(
+                        subdir,
                         urllib.parse.urlparse(file_url).path.split('/')[-1]
                     )
                     if local_filename != subdir:
                         link['href'] = local_filename
                         download_file(file_url, local_filename)
                 except Exception as e:
                     logger.error(f"Failed to process HTML link {href}: {e}")
+            progress_bar.progress((i + 1) / total_links)
+        with open(os.path.join(subdir, "index.html"), "w", encoding='utf-8') as file:
+            file.write(str(soup))
+        st.success("Content saved as index.html")
     except requests.exceptions.RequestException as e:
         logger.error(f"Failed to download content from {url}: {e}")
         st.error(f"Failed to download content from {url}")
         logger.error(f"Unexpected error while downloading content: {e}")
         st.error("An unexpected error occurred while downloading content")
+# Show download links
 def show_download_links(subdir: str) -> None:
+    for file in os.listdir(subdir):
         file_path = os.path.join(subdir, file)
         if os.path.isfile(file_path):
             st.markdown(get_download_link(file_path), unsafe_allow_html=True)
+# Get download link
+def get_download_link(file: str) -> str:
+    with open(file, "rb") as f:
+        bytes_content = f.read()
+        b64 = base64.b64encode(bytes_content).decode()
+        filename = os.path.basename(file)
+        return f'<a href="data:file/octet-stream;base64,{b64}" download="{filename}">Download: {filename}</a>'
+# Show file browser
+def show_file_browser():
+    st.write("File Browser")
+    root_dir = "downloads"
+    if not os.path.exists(root_dir):
+        st.warning("No downloads available. Use the Content Downloader to download files.")
+        return
+    for root, dirs, files in os.walk(root_dir):
+        level = root.replace(root_dir, '').count(os.sep)
+        indent = ' ' * 4 * level
+        st.write(f"{indent}{os.path.basename(root)}/")
+        sub_indent = ' ' * 4 * (level + 1)
+        for file in files:
+            st.write(f"{sub_indent}{file}")
+# Main function
+def main():
+    st.set_page_config(page_title="RSS Feed and Content Downloader", layout="wide")
+    # Toggle dark mode
+    toggle_dark_mode()
+    # Sidebar
+    with st.sidebar:
+        st.title("Navigation")
+        selected = option_menu(
+            menu_title=None,
+            options=["RSS Feed", "Content Downloader", "File Manager"],
+            icons=["rss", "cloud-download", "folder"],
+            menu_icon="cast",
+            default_index=0,
         )
+        # Dark mode toggle
+        st.checkbox("Dark Mode", key="dark_mode", on_change=toggle_dark_mode)
+    # Main content
+    if selected == "RSS Feed":
+        rss_feed_section()
+    elif selected == "Content Downloader":
+        content_downloader_section()
+    elif selected == "File Manager":
+        file_manager_section()
+# RSS Feed Section
+def rss_feed_section():
     st.header("RSS Feed")
     if st.button("Generate RSS Feed"):
+        with st.spinner("Generating RSS Feed..."):
+            rss_feed = generate_rss_feed()
         st.success("RSS Feed generated successfully!")
+        st.code(rss_feed, language="xml")
+        # Option to export RSS feed as XML file
+        st.download_button(
+            label="Download RSS Feed",
+            data=rss_feed,
+            file_name="rss_feed.xml",
+            mime="application/xml"
+        )
+# Content Downloader Section
+def content_downloader_section():
     st.header("Content Downloader")
+    selected_url = st.selectbox("Select a URL to download content from:", list(URLS.keys()))
     subdir = st.text_input("Enter subdirectory name to save files:", "downloads")
     if st.button("Download Content"):
+        with st.spinner("Downloading content..."):
+            download_html_and_files(URLS[selected_url], subdir)
         st.success("Content downloaded successfully!")
         show_download_links(subdir)
+# File Manager Section
+def file_manager_section():
+    st.header("File Manager")
+    show_file_browser()
 if __name__ == "__main__":
+    main()