acecalisto3 commited on
Commit
31b4ef8
·
verified ·
1 Parent(s): 7a8082e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +251 -154
app.py CHANGED
@@ -1,17 +1,24 @@
1
-
2
- import os
3
- import json
4
  import requests
 
5
  import urllib
6
- import hashlib
7
  import base64
8
- import logging
9
- import streamlit as st
10
  from bs4 import BeautifulSoup
11
- from typing import Optional, List
12
- import feedgenerator
 
 
 
 
 
13
  import time
14
- from streamlit_option_menu import option_menu
 
 
 
 
 
 
15
 
16
  # Set up logging
17
  logging.basicConfig(level=logging.INFO)
@@ -19,9 +26,7 @@ logger = logging.getLogger(__name__)
19
 
20
  # Constants
21
  EXCLUDED_FILES = [
22
- 'app.py', 'requirements.txt', 'pre-requirements.txt',
23
- 'packages.txt', 'readme.md', '.gitattributes',
24
- "backup.py", "dockerfile"
25
  ]
26
 
27
  URLS = {
@@ -38,85 +43,116 @@ URLS = {
38
  "john lennon": "https://www.ultimate-guitar.com/search.php?search_type=title&value=john%20lennon",
39
  }
40
 
41
- # Function to toggle dark mode
42
- def toggle_dark_mode():
43
- if 'dark_mode' not in st.session_state:
44
- st.session_state.dark_mode = False
45
-
46
- if st.session_state.dark_mode:
47
- st.markdown('''
48
- <style>
49
- .stApp {
50
- background-color: #2b2b2b;
51
- color: #ffffff;
52
- }
53
- </style>
54
- ''', unsafe_allow_html=True)
55
- else:
56
- st.markdown('''
57
- <style>
58
- .stApp {
59
- background-color: #ffffff;
60
- color: #000000;
61
- }
62
- </style>
63
- ''', unsafe_allow_html=True)
64
 
65
- # Generate RSS feed
66
- def generate_rss_feed():
67
- feed = feedgenerator.Rss201rev2Feed(
68
- title="Infinite Dataset Hub Updates",
69
- link="https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub",
70
- description="Latest updates from the Infinite Dataset Hub",
71
- language="en"
72
- )
73
 
74
- for i, line in enumerate(URLS):
75
- dataset_name = line
76
- feed.add_item(
77
- title=dataset_name,
78
- link=URLS[dataset_name],
79
- description=f"Link to {dataset_name}",
80
- pubdate=time.gmtime(time.time() - 86400 * i)
81
- )
82
 
83
- return feed.writeString('utf-8')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- # Download file
86
- def download_file(url: str, local_filename: str) -> Optional[str]:
87
- try:
88
- with requests.get(url, stream=True) as r:
89
- r.raise_for_status()
90
- with open(local_filename, 'wb') as f:
91
- for chunk in r.iter_content(chunk_size=8192):
92
- f.write(chunk)
93
- logger.info(f"File downloaded successfully: {local_filename}")
94
- return local_filename
95
- except requests.exceptions.RequestException as err:
96
- logger.error(f"Error occurred while downloading {url}: {err}")
97
- return None
98
-
99
- # Download HTML and files
100
  def download_html_and_files(url: str, subdir: str) -> None:
 
 
 
 
 
 
101
  try:
102
  os.makedirs(subdir, exist_ok=True)
 
103
  response = requests.get(url, timeout=30)
104
  response.raise_for_status()
105
  content = response.text
106
 
107
- soup = BeautifulSoup(content, 'html.parser')
108
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  base_url = urllib.parse.urlunparse(
110
  urllib.parse.urlparse(url)._replace(
111
  path='', params='', query='', fragment=''
112
  )
113
  )
114
 
115
- progress_bar = st.progress(0)
116
- total_links = len(soup.find_all('a'))
117
- for i, link in enumerate(soup.find_all('a')):
118
- href = link.get('href')
119
- if href:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  try:
121
  file_url = urllib.parse.urljoin(base_url, href)
122
  local_filename = os.path.join(
@@ -124,16 +160,26 @@ def download_html_and_files(url: str, subdir: str) -> None:
124
  urllib.parse.urlparse(file_url).path.split('/')[-1]
125
  )
126
 
 
 
 
127
  if local_filename != subdir:
128
  link['href'] = local_filename
129
  download_file(file_url, local_filename)
 
130
  except Exception as e:
131
  logger.error(f"Failed to process HTML link {href}: {e}")
132
- progress_bar.progress((i + 1) / total_links)
133
 
134
- with open(os.path.join(subdir, "index.html"), "w", encoding='utf-8') as file:
135
- file.write(str(soup))
136
- st.success("Content saved as index.html")
 
 
 
 
 
 
137
  except requests.exceptions.RequestException as e:
138
  logger.error(f"Failed to download content from {url}: {e}")
139
  st.error(f"Failed to download content from {url}")
@@ -141,99 +187,150 @@ def download_html_and_files(url: str, subdir: str) -> None:
141
  logger.error(f"Unexpected error while downloading content: {e}")
142
  st.error("An unexpected error occurred while downloading content")
143
 
144
- # Show download links
145
- def show_download_links(subdir: str) -> None:
146
- for file in os.listdir(subdir):
147
- file_path = os.path.join(subdir, file)
148
- if os.path.isfile(file_path):
149
- st.markdown(get_download_link(file_path), unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
- # Get download link
152
  def get_download_link(file: str) -> str:
153
- with open(file, "rb") as f:
154
- bytes_content = f.read()
 
 
155
  b64 = base64.b64encode(bytes_content).decode()
156
  filename = os.path.basename(file)
157
  return f'<a href="data:file/octet-stream;base64,{b64}" download="{filename}">Download: {filename}</a>'
 
 
 
158
 
159
- # Show file browser
160
- def show_file_browser():
161
- st.write("File Browser")
162
- root_dir = "downloads"
163
- if not os.path.exists(root_dir):
164
- st.warning("No downloads available. Use the Content Downloader to download files.")
165
- return
166
 
167
- for root, dirs, files in os.walk(root_dir):
168
- level = root.replace(root_dir, '').count(os.sep)
169
- indent = ' ' * 4 * level
170
- st.write(f"{indent}{os.path.basename(root)}/")
171
- sub_indent = ' ' * 4 * (level + 1)
172
- for file in files:
173
- st.write(f"{sub_indent}{file}")
174
-
175
- # Main function
176
- def main():
177
- st.set_page_config(page_title="RSS Feed and Content Downloader", layout="wide")
178
-
179
- # Toggle dark mode
180
- toggle_dark_mode()
181
-
182
- # Sidebar
183
- with st.sidebar:
184
- st.title("Navigation")
185
- selected = option_menu(
186
- menu_title=None,
187
- options=["RSS Feed", "Content Downloader", "File Manager"],
188
- icons=["rss", "cloud-download", "folder"],
189
- menu_icon="cast",
190
- default_index=0,
191
- )
192
-
193
- # Dark mode toggle
194
- st.checkbox("Dark Mode", key="dark_mode", on_change=toggle_dark_mode)
195
-
196
- # Main content
197
- if selected == "RSS Feed":
198
- rss_feed_section()
199
- elif selected == "Content Downloader":
200
- content_downloader_section()
201
- elif selected == "File Manager":
202
- file_manager_section()
203
-
204
- # RSS Feed Section
205
- def rss_feed_section():
206
  st.header("RSS Feed")
207
  if st.button("Generate RSS Feed"):
208
- with st.spinner("Generating RSS Feed..."):
209
- rss_feed = generate_rss_feed()
210
  st.success("RSS Feed generated successfully!")
211
- st.code(rss_feed, language="xml")
212
-
213
- # Option to export RSS feed as XML file
214
- st.download_button(
215
- label="Download RSS Feed",
216
- data=rss_feed,
217
- file_name="rss_feed.xml",
218
- mime="application/xml"
219
- )
220
 
221
- # Content Downloader Section
222
- def content_downloader_section():
223
  st.header("Content Downloader")
224
  selected_url = st.selectbox("Select a URL to download content from:", list(URLS.keys()))
225
  subdir = st.text_input("Enter subdirectory name to save files:", "downloads")
226
 
227
  if st.button("Download Content"):
228
- with st.spinner("Downloading content..."):
229
- download_html_and_files(URLS[selected_url], subdir)
230
  st.success("Content downloaded successfully!")
231
  show_download_links(subdir)
232
 
233
- # File Manager Section
234
- def file_manager_section():
235
- st.header("File Manager")
236
- show_file_browser()
237
-
238
  if __name__ == "__main__":
239
- main()
 
1
+ import streamlit as st
 
 
2
  import requests
3
+ import os
4
  import urllib
 
5
  import base64
 
 
6
  from bs4 import BeautifulSoup
7
+ import hashlib
8
+ import json
9
+ import uuid
10
+ import logging
11
+ from typing import Optional, Dict, List
12
+ from pathlib import Path
13
+ import feedparser
14
  import time
15
+ import subprocess
16
+
17
+ # Install feedparser if not already installed
18
+ try:
19
+ import feedparser
20
+ except ImportError:
21
+ subprocess.check_call(['pip', 'install', 'feedparser'])
22
 
23
  # Set up logging
24
  logging.basicConfig(level=logging.INFO)
 
26
 
27
  # Constants
28
  EXCLUDED_FILES = [
29
+ 'app.py', 'requirements.txt', 'pre-requirements.txt', 'packages.txt', 'readme.md', '.gitattributes', "backup.py", "dockerfile"
 
 
30
  ]
31
 
32
  URLS = {
 
43
  "john lennon": "https://www.ultimate-guitar.com/search.php?search_type=title&value=john%20lennon",
44
  }
45
 
46
+ def initialize_history() -> None:
47
+ """Initialize history.json if it doesn't exist."""
48
+ if not os.path.exists("history.json"):
49
+ with open("history.json", "w") as f:
50
+ json.dump({}, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
+ def download_file(url: str, local_filename: str) -> Optional[str]:
53
+ """Download a file from a URL to a local file.
 
 
 
 
 
 
54
 
55
+ Args:
56
+ url (str): The URL to download from
57
+ local_filename (str): The local file path to save to
 
 
 
 
 
58
 
59
+ Returns:
60
+ Optional[str]: The local filename if successful, None otherwise
61
+ """
62
+ if url.startswith(('http://', 'https://')):
63
+ try:
64
+ with requests.get(url, stream=True) as r:
65
+ r.raise_for_status()
66
+ with open(local_filename, 'wb') as f:
67
+ for chunk in r.iter_content(chunk_size=8192):
68
+ f.write(chunk)
69
+ return local_filename
70
+ except requests.exceptions.HTTPError as err:
71
+ logger.error(f"HTTP error occurred: {err}")
72
+ return None
73
+ return None
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def download_html_and_files(url: str, subdir: str) -> None:
76
+ """Download HTML/XML content and associated files from a URL.
77
+
78
+ Args:
79
+ url (str): The URL to download content from
80
+ subdir (str): The subdirectory to save files to
81
+ """
82
  try:
83
  os.makedirs(subdir, exist_ok=True)
84
+
85
  response = requests.get(url, timeout=30)
86
  response.raise_for_status()
87
  content = response.text
88
 
89
+ # Determine if content is XML or HTML
90
+ is_xml = url.endswith('.xml') or '<rss' in content[:1000] or '<?xml' in content[:1000]
91
+
92
+ try:
93
+ if is_xml:
94
+ soup = BeautifulSoup(content, 'xml') # Use XML parser for XML content
95
+ st.info("Processing XML content...")
96
+ else:
97
+ soup = BeautifulSoup(content, 'html.parser')
98
+ st.info("Processing HTML content...")
99
+
100
+ except Exception as e:
101
+ # Try alternative parser if first attempt fails
102
+ try:
103
+ soup = BeautifulSoup(content, 'lxml')
104
+ st.info("Using alternative parser (lxml)...")
105
+ except Exception as inner_e:
106
+ logger.error(f"Failed to parse content: {e}, {inner_e}")
107
+ st.error(f"Failed to parse content from {url}")
108
+ return
109
+
110
  base_url = urllib.parse.urlunparse(
111
  urllib.parse.urlparse(url)._replace(
112
  path='', params='', query='', fragment=''
113
  )
114
  )
115
 
116
+ # Handle links differently for XML and HTML
117
+ if is_xml:
118
+ # For XML, look for specific tags that might contain links
119
+ link_tags = (
120
+ soup.find_all('link') +
121
+ soup.find_all('url') +
122
+ soup.find_all('enclosure') +
123
+ soup.find_all('media:content')
124
+ )
125
+
126
+ for link in link_tags:
127
+ try:
128
+ # Get URL from appropriate attribute
129
+ href = (
130
+ link.get('href') or
131
+ link.get('url') or
132
+ link.get('src') or
133
+ link.text.strip()
134
+ )
135
+
136
+ if href and (href.startswith('http://') or href.startswith('https://')):
137
+ file_url = href
138
+ local_filename = os.path.join(
139
+ subdir,
140
+ urllib.parse.urlparse(file_url).path.split('/')[-1]
141
+ )
142
+
143
+ if local_filename and not local_filename.endswith('/'):
144
+ download_file(file_url, local_filename)
145
+
146
+ except Exception as e:
147
+ logger.error(f"Failed to process XML link: {e}")
148
+ continue
149
+ else:
150
+ # Original HTML processing
151
+ for link in soup.find_all('a'):
152
+ href = link.get('href')
153
+ if not href:
154
+ continue
155
+
156
  try:
157
  file_url = urllib.parse.urljoin(base_url, href)
158
  local_filename = os.path.join(
 
160
  urllib.parse.urlparse(file_url).path.split('/')[-1]
161
  )
162
 
163
+ if not local_filename or local_filename.endswith('/'):
164
+ continue
165
+
166
  if local_filename != subdir:
167
  link['href'] = local_filename
168
  download_file(file_url, local_filename)
169
+
170
  except Exception as e:
171
  logger.error(f"Failed to process HTML link {href}: {e}")
172
+ continue
173
 
174
+ # Save the processed content
175
+ try:
176
+ output_filename = "feed.xml" if is_xml else "index.html"
177
+ with open(os.path.join(subdir, output_filename), "w", encoding='utf-8') as file:
178
+ file.write(str(soup))
179
+ st.success(f"Content saved as {output_filename}")
180
+ except Exception as e:
181
+ logger.error(f"Failed to save content file: {e}")
182
+ st.error("Failed to save downloaded content")
183
  except requests.exceptions.RequestException as e:
184
  logger.error(f"Failed to download content from {url}: {e}")
185
  st.error(f"Failed to download content from {url}")
 
187
  logger.error(f"Unexpected error while downloading content: {e}")
188
  st.error("An unexpected error occurred while downloading content")
189
 
190
+ def list_files(directory_path: str = '.') -> List[str]:
191
+ """List all files in directory excluding EXCLUDED_FILES."""
192
+ files = [f for f in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, f))]
193
+ return [f for f in files if f not in EXCLUDED_FILES]
194
+
195
+ def file_editor(file_path: str) -> None:
196
+ """Edit file content using Streamlit text area."""
197
+ st.write(f"Editing File: {os.path.basename(file_path)}")
198
+
199
+ try:
200
+ with open(file_path, "r", encoding='utf-8') as f:
201
+ file_content = f.read()
202
+ except Exception as e:
203
+ logger.error(f"Failed to read file {file_path}: {e}")
204
+ st.error("Failed to read file")
205
+ return
206
+
207
+ edited_content = st.text_area(
208
+ "Edit the file content:",
209
+ value=file_content,
210
+ height=250
211
+ )
212
+
213
+ if st.button("💾 Save"):
214
+ try:
215
+ with open(file_path, "w", encoding='utf-8') as f:
216
+ f.write(edited_content)
217
+ st.success(f"File '{os.path.basename(file_path)}' saved!")
218
+ except Exception as e:
219
+ logger.error(f"Failed to save file {file_path}: {e}")
220
+ st.error("Failed to save file")
221
+
222
+ def show_file_operations(file_path: str, sequence_number: int) -> None:
223
+ """Show file operations UI for a given file."""
224
+ unique_key = hashlib.md5(file_path.encode()).hexdigest()
225
+ file_content = ""
226
+
227
+ col01, col02, col1, col2, col3 = st.columns(5)
228
+
229
+ with col01:
230
+ st.write(os.path.basename(file_path))
231
+
232
+ with col1:
233
+ edit_key = f"edit_{unique_key}{sequence_number}"
234
+ if st.button("✏️ Edit", key=edit_key):
235
+ try:
236
+ with open(file_path, "r", encoding='utf-8') as f:
237
+ file_content = f.read()
238
+ text_area_key = f"text_area{unique_key}_{sequence_number}"
239
+ file_content = st.text_area(
240
+ "Edit the file content:",
241
+ value=file_content,
242
+ height=250,
243
+ key=text_area_key
244
+ )
245
+ except Exception as e:
246
+ logger.error(f"Failed to read file {file_path}: {e}")
247
+ st.error("Failed to read file")
248
+
249
+ with col2:
250
+ save_key = f"save_{unique_key}_{sequence_number}"
251
+ if st.button("💾 Save", key=save_key):
252
+ if file_content:
253
+ try:
254
+ with open(file_path, "w", encoding='utf-8') as f:
255
+ f.write(file_content)
256
+ st.success("File saved!")
257
+ except Exception as e:
258
+ logger.error(f"Failed to save file {file_path}: {e}")
259
+ st.error("Failed to save file")
260
+
261
+ with col3:
262
+ delete_key = f"delete_{unique_key}_{sequence_number}"
263
+ if st.button("🗑️ Delete", key=delete_key):
264
+ try:
265
+ os.remove(file_path)
266
+ st.success("File deleted!")
267
+ except Exception as e:
268
+ logger.error(f"Failed to delete file {file_path}: {e}")
269
+ st.error("Failed to delete file")
270
 
 
271
  def get_download_link(file: str) -> str:
272
+ """Generate a download link for a file."""
273
+ try:
274
+ with open(file, "rb") as f:
275
+ bytes_content = f.read()
276
  b64 = base64.b64encode(bytes_content).decode()
277
  filename = os.path.basename(file)
278
  return f'<a href="data:file/octet-stream;base64,{b64}" download="{filename}">Download: {filename}</a>'
279
+ except Exception as e:
280
+ logger.error(f"Failed to create download link for {file}: {e}")
281
+ return f"Failed to create download link for {os.path.basename(file)}"
282
 
283
+ def show_download_links(subdir: str) -> None:
284
+ """Show download links for all files in a directory."""
285
+ global file_sequence_numbers
 
 
 
 
286
 
287
+ if not hasattr(show_download_links, 'file_sequence_numbers'):
288
+ show_download_links.file_sequence_numbers = {}
289
+
290
+ for file in list_files(subdir):
291
+ file_path = os.path.join(subdir, file)
292
+
293
+ if file_path not in show_download_links.file_sequence_numbers:
294
+ show_download_links.file_sequence_numbers[file_path] = 1
295
+ else:
296
+ show_download_links.file_sequence_numbers[file_path] += 1
297
+
298
+ sequence_number = show_download_links.file_sequence_numbers[file_path]
299
+
300
+ if os.path.isfile(file_path):
301
+ st.markdown(get_download_link(file_path), unsafe_allow_html=True)
302
+ show_file_operations(file_path, sequence_number)
303
+ else:
304
+ st.write(f"File not found: {file}")
305
+
306
+ # Generate RSS feed
307
+ def generate_rss_feed():
308
+ feed = feedparser.parse("https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub/rss")
309
+ return feed
310
+
311
+ def main() -> None:
312
+ """Main app"""
313
+ st.title("RSS Feed and Content Downloader")
314
+
315
+ # Initialize history
316
+ initialize_history()
317
+
318
+ # RSS Feed Section
 
 
 
 
 
 
 
319
  st.header("RSS Feed")
320
  if st.button("Generate RSS Feed"):
321
+ rss_feed = generate_rss_feed()
 
322
  st.success("RSS Feed generated successfully!")
323
+ st.write(rss_feed)
 
 
 
 
 
 
 
 
324
 
325
+ # Content Downloader Section
 
326
  st.header("Content Downloader")
327
  selected_url = st.selectbox("Select a URL to download content from:", list(URLS.keys()))
328
  subdir = st.text_input("Enter subdirectory name to save files:", "downloads")
329
 
330
  if st.button("Download Content"):
331
+ download_html_and_files(URLS[selected_url], subdir)
 
332
  st.success("Content downloaded successfully!")
333
  show_download_links(subdir)
334
 
 
 
 
 
 
335
  if __name__ == "__main__":
336
+ main()