acecalisto3 commited on
Commit
9024249
·
verified ·
1 Parent(s): ecc6446

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -70
app.py CHANGED
@@ -10,6 +10,7 @@ import uuid
10
  import logging
11
  from typing import Optional, Dict, List, Any
12
  from pathlib import Path
 
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.INFO)
@@ -302,79 +303,19 @@ def show_download_links(subdir: str) -> None:
302
 
303
  def main() -> None:
304
  """Main application function."""
305
- st.sidebar.title('Web Datasets Bulk Downloader')
306
 
307
- # Initialize history file
308
- initialize_history()
309
 
310
- # Check for query parameters
311
- query_params = st.experimental_get_query_params()
312
- file_to_edit = query_params.get('file_to_edit', [None])[0]
 
 
313
 
314
- if file_to_edit and os.path.exists(file_to_edit):
315
- file_editor(file_to_edit)
316
- else:
317
- # URL input method selection
318
- url_input_method = st.sidebar.radio(
319
- "Choose URL Input Method",
320
- ["Enter URL", "Select from List"]
321
- )
322
-
323
- url = ""
324
- if url_input_method == "Enter URL":
325
- url = st.sidebar.text_input(
326
- 'Please enter a Web URL to bulk download text and files'
327
- )
328
- else:
329
- selected_site = st.sidebar.selectbox(
330
- "Select a Website",
331
- list(URLS.keys())
332
- )
333
- url = URLS[selected_site]
334
-
335
- # Load history
336
- try:
337
- with open("history.json", "r") as f:
338
- history = json.load(f)
339
- except Exception as e:
340
- logger.error(f"Failed to load history: {e}")
341
- history = {}
342
-
343
- # Handle URL submission
344
- if url:
345
- subdir = hashlib.md5(url.encode()).hexdigest()
346
- os.makedirs(subdir, exist_ok=True)
347
-
348
- if url not in history:
349
- history[url] = subdir
350
- try:
351
- with open("history.json", "w") as f:
352
- json.dump(history, f)
353
- except Exception as e:
354
- logger.error(f"Failed to save history: {e}")
355
-
356
- # Download content button
357
- if st.sidebar.button('📥 Get All the Content'):
358
- if url:
359
- download_html_and_files(url, history[url])
360
- show_download_links(history[url])
361
- else:
362
- st.warning("Please enter or select a URL first")
363
-
364
- # Show download links button
365
- if st.sidebar.button('📂 Show Download Links'):
366
- for subdir in history.values():
367
- show_download_links(subdir)
368
-
369
- # URL history expander
370
- with st.expander("URL History and Downloaded Files"):
371
- for url, subdir in history.items():
372
- st.markdown(f"#### {url}")
373
- show_download_links(subdir)
374
-
375
- # Update current files
376
- for subdir in history.values():
377
- show_download_links(subdir)
378
 
379
  if __name__ == "__main__":
380
  main()
 
10
  import logging
11
  from typing import Optional, Dict, List, Any
12
  from pathlib import Path
13
+ from shot_scraper import ShotScraper # Importing shot-scraper
14
 
15
  # Set up logging
16
  logging.basicConfig(level=logging.INFO)
 
303
 
304
  def main() -> None:
305
  """Main application function."""
306
+ st.sidebar.title("Infinite Dataset Hub")
307
 
308
+ # Additional UI elements and logic for your Streamlit app
309
+ # ...
310
 
311
+ st.sidebar.subheader("Available URLs")
312
+ for name, url in URLS.items():
313
+ if st.sidebar.button(name):
314
+ st.write(f"Downloading from {url}")
315
+ download_html_and_files(url, name.replace(" ", "_"))
316
 
317
+ # Initialize history
318
+ initialize_history()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
 
320
  if __name__ == "__main__":
321
  main()