import subprocess import sys from huggingface_upload import upload_all_to_huggingface all_scripts = [ "data_updating_scripts/get_data.py", "data_updating_scripts/fix_pdf_bills.py", "data_updating_scripts/known_bills_status.py", "data_updating_scripts/migrate_iapp_categories.py", "data_updating_scripts/mark_no_text_bills.py", "data_updating_scripts/generate_summaries.py", "data_updating_scripts/generate_suggested_questions.py", "data_updating_scripts/generate_reports.py", "data_updating_scripts/eu_vectorstore.py", ] print("Do you want to pull new data from LegiScan?") print("Enter 'y' or 'yes' to pull new data, or 'n' or 'no' to skip and use existing data:") response = input().strip().lower() if response in ["y", "yes"]: print("\n✓ Will pull new data from LegiScan") scripts_to_run = all_scripts elif response in ["n", "no"]: print("\n✓ Skipping data pull, using existing data") scripts_to_run = all_scripts[2:] else: print(f"\n✗ Invalid response '{response}'. Please run the script again and enter 'y' or 'n'.") sys.exit(1) print(f"\nWill run {len(scripts_to_run)} scripts:") for script in scripts_to_run: print(f" - {script}") print("\n" + "=" * 50) for script in scripts_to_run: print(f"\n--- Running {script} ---") print("=" * 50) result = subprocess.run([sys.executable, script]) if result.returncode != 0: print(f"\n✗ Script {script} failed with return code {result.returncode}") print("Do you want to continue with the remaining scripts? (y/n):") continue_response = input().strip().lower() if continue_response not in ["y", "yes"]: print("Stopping pipeline execution.") sys.exit(1) else: print(f"✓ {script} completed successfully") print("\n" + "=" * 50) print("✓ Pipeline execution completed!") print("\nUploading all JSON datasets to HuggingFace…") try: upload_all_to_huggingface() print("✓ HuggingFace upload complete!") except KeyError as e: print(f"✗ HuggingFace config error: {e}") except Exception as e: print(f"✗ Upload failed: {e}")