acecalisto3 commited on
Commit
c4c5e82
·
verified ·
1 Parent(s): ef30ef7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -274
app.py CHANGED
@@ -1,30 +1,30 @@
1
- import streamlit as st
2
- import requests
3
  import os
 
 
4
  import urllib
5
- import base64
6
- from bs4 import BeautifulSoup
7
  import hashlib
8
- import json
9
- import uuid
10
  import logging
11
- from typing import Optional, Dict, List, Any
12
- from pathlib import Path
13
- from shot_scraper import shotscraper # importing shot-scraper
14
- import feedgenerator # Import RSS feed generator
 
 
15
 
16
- # set up logging
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
- # constants
21
- excluded_files = [
22
  'app.py', 'requirements.txt', 'pre-requirements.txt',
23
  'packages.txt', 'readme.md', '.gitattributes',
24
  "backup.py", "dockerfile"
25
  ]
26
 
27
- urls = {
28
  "chordify - play along chords": "https://chordify.net/",
29
  "national guitar academy - guitar learning": "https://www.guitaracademy.com/",
30
  "ultimate guitar - massive song database": "https://www.ultimate-guitar.com/",
@@ -38,146 +38,102 @@ urls = {
38
  "john lennon": "https://www.ultimate-guitar.com/search.php?search_type=title&value=john%20lennon",
39
  }
40
 
41
- def initialize_history() -> None:
42
- """Initialize history.json if it doesn't exist."""
43
- if not os.path.exists("history.json"):
44
- with open("history.json", "w") as f:
45
- json.dump({}, f)
46
-
47
- def download_file(url: str, local_filename: str) -> Optional[str]:
48
- """
49
- Download a file from a URL to a local file.
50
 
51
- Args:
52
- url (str): The URL to download from
53
- local_filename (str): The local file path to save to
54
-
55
- Returns:
56
- Optional[str]: The local filename if successful, None otherwise
57
- """
58
- if url.startswith(('http://', 'https://')):
59
- try:
60
- with requests.get(url, stream=True) as r:
61
- r.raise_for_status()
62
- with open(local_filename, 'wb') as f:
63
- for chunk in r.iter_content(chunk_size=8192):
64
- f.write(chunk)
65
- return local_filename
66
- except requests.exceptions.HTTPError as err:
67
- logger.error(f"HTTP error occurred: {err}")
68
- return None
69
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def download_html_and_files(url: str, subdir: str) -> None:
72
- """
73
- Download HTML/XML content and associated files from a URL.
74
-
75
- Args:
76
- url (str): The URL to download content from
77
- subdir (str): The subdirectory to save files to
78
- """
79
  try:
80
  os.makedirs(subdir, exist_ok=True)
81
-
82
  response = requests.get(url, timeout=30)
83
  response.raise_for_status()
84
  content = response.text
 
 
85
 
86
- # Determine if content is XML or HTML
87
- is_xml = url.endswith('.xml') or '<rss' in content[:1000] or '<?xml' in content[:1000]
88
-
89
- try:
90
- if is_xml:
91
- soup = BeautifulSoup(content, 'xml') # Use XML parser for XML content
92
- st.info("Processing XML content...")
93
- else:
94
- soup = BeautifulSoup(content, 'html.parser')
95
- st.info("Processing HTML content...")
96
-
97
- except Exception as e:
98
- # Try alternative parser if first attempt fails
99
- try:
100
- soup = BeautifulSoup(content, 'lxml')
101
- st.info("Using alternative parser (lxml)...")
102
- except Exception as inner_e:
103
- logger.error(f"Failed to parse content: {e}, {inner_e}")
104
- st.error(f"Failed to parse content from {url}")
105
- return
106
-
107
  base_url = urllib.parse.urlunparse(
108
  urllib.parse.urlparse(url)._replace(
109
  path='', params='', query='', fragment=''
110
  )
111
  )
112
-
113
- # Handle links differently for XML and HTML
114
- if is_xml:
115
- # For XML, look for specific tags that might contain links
116
- link_tags = (
117
- soup.find_all('link') +
118
- soup.find_all('url') +
119
- soup.find_all('enclosure') +
120
- soup.find_all('media:content')
121
- )
122
-
123
- for link in link_tags:
124
- try:
125
- # Get URL from appropriate attribute
126
- href = (
127
- link.get('href') or
128
- link.get('url') or
129
- link.get('src') or
130
- link.text.strip()
131
- )
132
-
133
- if href and (href.startswith('http://') or href.startswith('https://')):
134
- file_url = href
135
- local_filename = os.path.join(
136
- subdir,
137
- urllib.parse.urlparse(file_url).path.split('/')[-1]
138
- )
139
-
140
- if local_filename and not local_filename.endswith('/'):
141
- download_file(file_url, local_filename)
142
-
143
- except Exception as e:
144
- logger.error(f"Failed to process XML link: {e}")
145
- continue
146
- else:
147
- # Original HTML processing
148
- for link in soup.find_all('a'):
149
- href = link.get('href')
150
- if not href:
151
- continue
152
-
153
  try:
154
  file_url = urllib.parse.urljoin(base_url, href)
155
  local_filename = os.path.join(
156
- subdir,
157
  urllib.parse.urlparse(file_url).path.split('/')[-1]
158
  )
159
-
160
- if not local_filename or local_filename.endswith('/'):
161
- continue
162
-
163
  if local_filename != subdir:
164
  link['href'] = local_filename
165
  download_file(file_url, local_filename)
166
-
167
  except Exception as e:
168
  logger.error(f"Failed to process HTML link {href}: {e}")
169
- continue
170
-
171
- # Save the processed content
172
- try:
173
- output_filename = "feed.xml" if is_xml else "index.html"
174
- with open(os.path.join(subdir, output_filename), "w", encoding='utf-8') as file:
175
- file.write(str(soup))
176
- st.success(f"Content saved as {output_filename}")
177
- except Exception as e:
178
- logger.error(f"Failed to save content file: {e}")
179
- st.error("Failed to save downloaded content")
180
-
181
  except requests.exceptions.RequestException as e:
182
  logger.error(f"Failed to download content from {url}: {e}")
183
  st.error(f"Failed to download content from {url}")
@@ -185,166 +141,99 @@ def download_html_and_files(url: str, subdir: str) -> None:
185
  logger.error(f"Unexpected error while downloading content: {e}")
186
  st.error("An unexpected error occurred while downloading content")
187
 
188
- def list_files(directory_path: str = '.') -> List[str]:
189
- """List all files in directory excluding EXCLUDED_FILES."""
190
- files = [f for f in os.listdir(directory_path)
191
- if os.path.isfile(os.path.join(directory_path, f))]
192
- return [f for f in files if f not in EXCLUDED_FILES]
193
-
194
- def file_editor(file_path: str) -> None:
195
- """Edit file content using Streamlit text area."""
196
- st.write(f"Editing File: {os.path.basename(file_path)}")
197
-
198
- try:
199
- with open(file_path, "r", encoding='utf-8') as f:
200
- file_content = f.read()
201
- except Exception as e:
202
- logger.error(f"Failed to read file {file_path}: {e}")
203
- st.error("Failed to read file")
204
- return
205
-
206
- edited_content = st.text_area(
207
- "Edit the file content:",
208
- value=file_content,
209
- height=250
210
- )
211
-
212
- if st.button("💾 Save"):
213
- try:
214
- with open(file_path, "w", encoding='utf-8') as f:
215
- f.write(edited_content)
216
- st.success(f"File '{os.path.basename(file_path)}' saved!")
217
- except Exception as e:
218
- logger.error(f"Failed to save file {file_path}: {e}")
219
- st.error("Failed to save file")
220
-
221
- def show_file_operations(file_path: str, sequence_number: int) -> None:
222
- """Show file operations UI for a given file."""
223
- unique_key = hashlib.md5(file_path.encode()).hexdigest()
224
- file_content = ""
225
-
226
- col01, col02, col1, col2, col3 = st.columns(5)
227
-
228
- with col01:
229
- st.write(os.path.basename(file_path))
230
-
231
- with col1:
232
- edit_key = f"edit_{unique_key}_{sequence_number}"
233
- if st.button("✏️ Edit", key=edit_key):
234
- try:
235
- with open(file_path, "r", encoding='utf-8') as f:
236
- file_content = f.read()
237
- text_area_key = f"text_area_{unique_key}_{sequence_number}"
238
- file_content = st.text_area(
239
- "Edit the file content:",
240
- value=file_content,
241
- height=250,
242
- key=text_area_key
243
- )
244
- except Exception as e:
245
- logger.error(f"Failed to read file {file_path}: {e}")
246
- st.error("Failed to read file")
247
-
248
- with col2:
249
- save_key = f"save_{unique_key}_{sequence_number}"
250
- if st.button("💾 Save", key=save_key):
251
- if file_content:
252
- try:
253
- with open(file_path, "w", encoding='utf-8') as f:
254
- f.write(file_content)
255
- st.success("File saved!")
256
- except Exception as e:
257
- logger.error(f"Failed to save file {file_path}: {e}")
258
- st.error("Failed to save file")
259
-
260
- with col3:
261
- delete_key = f"delete_{unique_key}_{sequence_number}"
262
- if st.button("🗑️ Delete", key=delete_key):
263
- try:
264
- os.remove(file_path)
265
- st.success("File deleted!")
266
- except Exception as e:
267
- logger.error(f"Failed to delete file {file_path}: {e}")
268
- st.error("Failed to delete file")
269
-
270
- def get_download_link(file: str) -> str:
271
- """Generate a download link for a file."""
272
- try:
273
- with open(file, "rb") as f:
274
- bytes_content = f.read()
275
- b64 = base64.b64encode(bytes_content).decode()
276
- filename = os.path.basename(file)
277
- return f'<a href="data:file/octet-stream;base64,{b64}" download=\'{filename}\'>Download: {filename}</a>'
278
- except Exception as e:
279
- logger.error(f"Failed to create download link for {file}: {e}")
280
- return f"Failed to create download link for {os.path.basename(file)}"
281
-
282
  def show_download_links(subdir: str) -> None:
283
- """Show download links for all files in a directory."""
284
- global file_sequence_numbers
285
-
286
- if not hasattr(show_download_links, 'file_sequence_numbers'):
287
- show_download_links.file_sequence_numbers = {}
288
-
289
- for file in list_files(subdir):
290
  file_path = os.path.join(subdir, file)
291
-
292
- if file_path not in show_download_links.file_sequence_numbers:
293
- show_download_links.file_sequence_numbers[file_path] = 1
294
- else:
295
- show_download_links.file_sequence_numbers[file_path] += 1
296
-
297
- sequence_number = show_download_links.file_sequence_numbers[file_path]
298
-
299
  if os.path.isfile(file_path):
300
  st.markdown(get_download_link(file_path), unsafe_allow_html=True)
301
- show_file_operations(file_path, sequence_number)
302
- else:
303
- st.write(f"File not found: {file}")
304
 
305
- # Generate RSS feed
306
- def generate_rss_feed():
307
- feed = feedgenerator.Rss201rev2Feed(
308
- title="Infinite Dataset Hub Updates",
309
- link="https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub",
310
- description="Latest updates from the Infinite Dataset Hub",
311
- language="en"
312
- )
 
 
 
 
 
 
 
313
 
314
- for i, line in enumerate(urls):
315
- dataset_name = line
316
- feed.add_item(
317
- title=dataset_name,
318
- link=urls[dataset_name],
319
- description=f"Link to {dataset_name}",
320
- pubdate=time.gmtime(time.time() - 86400 * i)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  )
322
-
323
- return feed.writeString('utf-8')
324
-
325
- def main() -> None:
326
- """app.py"""
327
- st.title("RSS Feed and Content Downloader")
328
-
329
- # Initialize history
330
- initialize_history()
331
-
332
- # RSS Feed Section
 
 
 
333
  st.header("RSS Feed")
334
  if st.button("Generate RSS Feed"):
335
- rss_feed = generate_rss_feed()
 
336
  st.success("RSS Feed generated successfully!")
337
- st.markdown(rss_feed, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
338
 
339
- # Content Downloader Section
 
340
  st.header("Content Downloader")
341
- selected_url = st.selectbox("Select a URL to download content from:", list(urls.keys()))
342
  subdir = st.text_input("Enter subdirectory name to save files:", "downloads")
343
 
344
  if st.button("Download Content"):
345
- download_html_and_files(urls[selected_url], subdir)
 
346
  st.success("Content downloaded successfully!")
347
  show_download_links(subdir)
348
 
 
 
 
 
 
349
  if __name__ == "__main__":
350
- main()
 
1
+
 
2
  import os
3
+ import json
4
+ import requests
5
  import urllib
 
 
6
  import hashlib
7
+ import base64
 
8
  import logging
9
+ import streamlit as st
10
+ from bs4 import BeautifulSoup
11
+ from typing import Optional, List
12
+ import feedgenerator
13
+ import time
14
+ from streamlit_option_menu import option_menu
15
 
16
+ # Set up logging
17
  logging.basicConfig(level=logging.INFO)
18
  logger = logging.getLogger(__name__)
19
 
20
+ # Constants
21
+ EXCLUDED_FILES = [
22
  'app.py', 'requirements.txt', 'pre-requirements.txt',
23
  'packages.txt', 'readme.md', '.gitattributes',
24
  "backup.py", "dockerfile"
25
  ]
26
 
27
+ URLS = {
28
  "chordify - play along chords": "https://chordify.net/",
29
  "national guitar academy - guitar learning": "https://www.guitaracademy.com/",
30
  "ultimate guitar - massive song database": "https://www.ultimate-guitar.com/",
 
38
  "john lennon": "https://www.ultimate-guitar.com/search.php?search_type=title&value=john%20lennon",
39
  }
40
 
41
+ # Function to toggle dark mode
42
+ def toggle_dark_mode():
43
+ if 'dark_mode' not in st.session_state:
44
+ st.session_state.dark_mode = False
 
 
 
 
 
45
 
46
+ if st.session_state.dark_mode:
47
+ st.markdown('''
48
+ <style>
49
+ .stApp {
50
+ background-color: #2b2b2b;
51
+ color: #ffffff;
52
+ }
53
+ </style>
54
+ ''', unsafe_allow_html=True)
55
+ else:
56
+ st.markdown('''
57
+ <style>
58
+ .stApp {
59
+ background-color: #ffffff;
60
+ color: #000000;
61
+ }
62
+ </style>
63
+ ''', unsafe_allow_html=True)
64
+
65
+ # Generate RSS feed
66
+ def generate_rss_feed():
67
+ feed = feedgenerator.Rss201rev2Feed(
68
+ title="Infinite Dataset Hub Updates",
69
+ link="https://huggingface.co/spaces/infinite-dataset-hub/infinite-dataset-hub",
70
+ description="Latest updates from the Infinite Dataset Hub",
71
+ language="en"
72
+ )
73
+
74
+ for i, line in enumerate(URLS):
75
+ dataset_name = line
76
+ feed.add_item(
77
+ title=dataset_name,
78
+ link=URLS[dataset_name],
79
+ description=f"Link to {dataset_name}",
80
+ pubdate=time.gmtime(time.time() - 86400 * i)
81
+ )
82
+
83
+ return feed.writeString('utf-8')
84
 
85
+ # Download file
86
+ def download_file(url: str, local_filename: str) -> Optional[str]:
87
+ try:
88
+ with requests.get(url, stream=True) as r:
89
+ r.raise_for_status()
90
+ with open(local_filename, 'wb') as f:
91
+ for chunk in r.iter_content(chunk_size=8192):
92
+ f.write(chunk)
93
+ logger.info(f"File downloaded successfully: {local_filename}")
94
+ return local_filename
95
+ except requests.exceptions.RequestException as err:
96
+ logger.error(f"Error occurred while downloading {url}: {err}")
97
+ return None
98
+
99
+ # Download HTML and files
100
  def download_html_and_files(url: str, subdir: str) -> None:
 
 
 
 
 
 
 
101
  try:
102
  os.makedirs(subdir, exist_ok=True)
 
103
  response = requests.get(url, timeout=30)
104
  response.raise_for_status()
105
  content = response.text
106
+
107
+ soup = BeautifulSoup(content, 'html.parser')
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  base_url = urllib.parse.urlunparse(
110
  urllib.parse.urlparse(url)._replace(
111
  path='', params='', query='', fragment=''
112
  )
113
  )
114
+
115
+ progress_bar = st.progress(0)
116
+ total_links = len(soup.find_all('a'))
117
+ for i, link in enumerate(soup.find_all('a')):
118
+ href = link.get('href')
119
+ if href:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  try:
121
  file_url = urllib.parse.urljoin(base_url, href)
122
  local_filename = os.path.join(
123
+ subdir,
124
  urllib.parse.urlparse(file_url).path.split('/')[-1]
125
  )
126
+
 
 
 
127
  if local_filename != subdir:
128
  link['href'] = local_filename
129
  download_file(file_url, local_filename)
 
130
  except Exception as e:
131
  logger.error(f"Failed to process HTML link {href}: {e}")
132
+ progress_bar.progress((i + 1) / total_links)
133
+
134
+ with open(os.path.join(subdir, "index.html"), "w", encoding='utf-8') as file:
135
+ file.write(str(soup))
136
+ st.success("Content saved as index.html")
 
 
 
 
 
 
 
137
  except requests.exceptions.RequestException as e:
138
  logger.error(f"Failed to download content from {url}: {e}")
139
  st.error(f"Failed to download content from {url}")
 
141
  logger.error(f"Unexpected error while downloading content: {e}")
142
  st.error("An unexpected error occurred while downloading content")
143
 
144
+ # Show download links
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  def show_download_links(subdir: str) -> None:
146
+ for file in os.listdir(subdir):
 
 
 
 
 
 
147
  file_path = os.path.join(subdir, file)
 
 
 
 
 
 
 
 
148
  if os.path.isfile(file_path):
149
  st.markdown(get_download_link(file_path), unsafe_allow_html=True)
 
 
 
150
 
151
+ # Get download link
152
+ def get_download_link(file: str) -> str:
153
+ with open(file, "rb") as f:
154
+ bytes_content = f.read()
155
+ b64 = base64.b64encode(bytes_content).decode()
156
+ filename = os.path.basename(file)
157
+ return f'<a href="data:file/octet-stream;base64,{b64}" download="{filename}">Download: {filename}</a>'
158
+
159
+ # Show file browser
160
+ def show_file_browser():
161
+ st.write("File Browser")
162
+ root_dir = "downloads"
163
+ if not os.path.exists(root_dir):
164
+ st.warning("No downloads available. Use the Content Downloader to download files.")
165
+ return
166
 
167
+ for root, dirs, files in os.walk(root_dir):
168
+ level = root.replace(root_dir, '').count(os.sep)
169
+ indent = ' ' * 4 * level
170
+ st.write(f"{indent}{os.path.basename(root)}/")
171
+ sub_indent = ' ' * 4 * (level + 1)
172
+ for file in files:
173
+ st.write(f"{sub_indent}{file}")
174
+
175
+ # Main function
176
+ def main():
177
+ st.set_page_config(page_title="RSS Feed and Content Downloader", layout="wide")
178
+
179
+ # Toggle dark mode
180
+ toggle_dark_mode()
181
+
182
+ # Sidebar
183
+ with st.sidebar:
184
+ st.title("Navigation")
185
+ selected = option_menu(
186
+ menu_title=None,
187
+ options=["RSS Feed", "Content Downloader", "File Manager"],
188
+ icons=["rss", "cloud-download", "folder"],
189
+ menu_icon="cast",
190
+ default_index=0,
191
  )
192
+
193
+ # Dark mode toggle
194
+ st.checkbox("Dark Mode", key="dark_mode", on_change=toggle_dark_mode)
195
+
196
+ # Main content
197
+ if selected == "RSS Feed":
198
+ rss_feed_section()
199
+ elif selected == "Content Downloader":
200
+ content_downloader_section()
201
+ elif selected == "File Manager":
202
+ file_manager_section()
203
+
204
+ # RSS Feed Section
205
+ def rss_feed_section():
206
  st.header("RSS Feed")
207
  if st.button("Generate RSS Feed"):
208
+ with st.spinner("Generating RSS Feed..."):
209
+ rss_feed = generate_rss_feed()
210
  st.success("RSS Feed generated successfully!")
211
+ st.code(rss_feed, language="xml")
212
+
213
+ # Option to export RSS feed as XML file
214
+ st.download_button(
215
+ label="Download RSS Feed",
216
+ data=rss_feed,
217
+ file_name="rss_feed.xml",
218
+ mime="application/xml"
219
+ )
220
 
221
+ # Content Downloader Section
222
+ def content_downloader_section():
223
  st.header("Content Downloader")
224
+ selected_url = st.selectbox("Select a URL to download content from:", list(URLS.keys()))
225
  subdir = st.text_input("Enter subdirectory name to save files:", "downloads")
226
 
227
  if st.button("Download Content"):
228
+ with st.spinner("Downloading content..."):
229
+ download_html_and_files(URLS[selected_url], subdir)
230
  st.success("Content downloaded successfully!")
231
  show_download_links(subdir)
232
 
233
+ # File Manager Section
234
+ def file_manager_section():
235
+ st.header("File Manager")
236
+ show_file_browser()
237
+
238
  if __name__ == "__main__":
239
+ main()