| | |
| | """ |
| | Sync BitTransformerLM repository to HuggingFace Hub for OS launch. |
| | Uploads all cleaned documentation and code with proper commit message. |
| | """ |
| |
|
| | import os |
| | import logging |
| | from pathlib import Path |
| | from huggingface_hub import HfApi, login |
| | from typing import Optional, List |
| |
|
| | |
| | logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
| | logger = logging.getLogger(__name__) |
| |
|
| | def sync_repository_to_hf( |
| | repo_id: str = "WCNegentropy/BitTransformerLM", |
| | token: Optional[str] = None, |
| | commit_message: str = "π OS Launch: Clean documentation and refined licensing" |
| | ): |
| | """ |
| | Sync the entire cleaned BitTransformerLM repository to HuggingFace Hub. |
| | |
| | Args: |
| | repo_id: HuggingFace repository ID |
| | token: HF token (defaults to HF_TOKEN environment variable) |
| | commit_message: Commit message for the upload |
| | """ |
| | |
| | |
| | if token is None: |
| | token = os.environ.get('HF_TOKEN') |
| | if not token: |
| | logger.error("HF_TOKEN environment variable not set and no token provided") |
| | return False |
| | |
| | try: |
| | |
| | login(token=token) |
| | api = HfApi() |
| | logger.info("Successfully authenticated with HuggingFace Hub") |
| | |
| | |
| | repo_root = Path(__file__).parent |
| | logger.info(f"Repository root: {repo_root}") |
| | |
| | |
| | include_patterns = [ |
| | |
| | "bit_transformer/**/*.py", |
| | "tests/**/*.py", |
| | "*.py", |
| | |
| | |
| | "README.md", |
| | "MODEL_CARD.md", |
| | "RESEARCH_STATUS.md", |
| | "EMPIRICAL_VALIDATION.md", |
| | "OPEN_SOURCE_LAUNCH.md", |
| | "AGENTS.md", |
| | |
| | |
| | "requirements.txt", |
| | "pyproject.toml", |
| | "Dockerfile", |
| | "start.sh", |
| | |
| | |
| | "LICENSE/**/*.txt", |
| | ] |
| | |
| | |
| | exclude_patterns = [ |
| | "__pycache__/**", |
| | "*.pyc", |
| | ".git/**", |
| | ".pytest_cache/**", |
| | "weights/**", |
| | "checkpoints/**", |
| | "*.log", |
| | |
| | "BitTransformerLM_full_assessment.md", |
| | "FORENSIC_*.md", |
| | "state_of_the_repo_audit.md", |
| | |
| | "upload_to_hf.py", |
| | ] |
| | |
| | |
| | files_to_upload = [] |
| | for pattern in include_patterns: |
| | for file_path in repo_root.glob(pattern): |
| | if file_path.is_file(): |
| | |
| | relative_path = file_path.relative_to(repo_root) |
| | should_exclude = any( |
| | relative_path.match(exclude) |
| | for exclude in exclude_patterns |
| | ) |
| | if not should_exclude: |
| | files_to_upload.append(file_path) |
| | |
| | logger.info(f"Found {len(files_to_upload)} files to upload") |
| | |
| | |
| | uploaded_count = 0 |
| | for file_path in files_to_upload: |
| | try: |
| | relative_path = file_path.relative_to(repo_root) |
| | logger.info(f"Uploading: {relative_path}") |
| | |
| | api.upload_file( |
| | path_or_fileobj=str(file_path), |
| | path_in_repo=str(relative_path), |
| | repo_id=repo_id, |
| | repo_type="model", |
| | commit_message=commit_message, |
| | commit_description=""" |
| | This OS launch commit includes: |
| | |
| | β
**Cleaned Documentation** |
| | - Removed inflated claims and marketing language |
| | - Added honest research status and limitations |
| | - Created professional model card and validation reports |
| | - Streamlined licensing to AGPLv3 + commercial contact |
| | |
| | β
**Refined Codebase** |
| | - Complete experimental bit-native transformer implementation |
| | - 57 Python files with comprehensive research framework |
| | - Safety telemetry and monitoring systems |
| | - Distributed training and development tools |
| | |
| | β
**Professional Standards** |
| | - Empirical validation of all claims |
| | - Clear experimental vs production distinctions |
| | - Rigorous research methodology requirements |
| | - Community contribution framework |
| | |
| | Ready for serious research evaluation and academic investigation. |
| | """.strip() |
| | ) |
| | |
| | uploaded_count += 1 |
| | if uploaded_count % 10 == 0: |
| | logger.info(f"Progress: {uploaded_count}/{len(files_to_upload)} files uploaded") |
| | |
| | except Exception as e: |
| | logger.warning(f"Failed to upload {relative_path}: {e}") |
| | continue |
| | |
| | logger.info(f"β
Successfully uploaded {uploaded_count}/{len(files_to_upload)} files") |
| | logger.info(f"π Repository synced to: https://huggingface.co/{repo_id}") |
| | |
| | return True |
| | |
| | except Exception as e: |
| | logger.error(f"β Failed to sync repository: {e}") |
| | return False |
| |
|
| | def create_release_info(): |
| | """Create a release information file for the OS launch.""" |
| | release_info = """# BitTransformerLM v0.1.0 - Experimental Research Release |
| | |
| | **Release Date:** August 2025 |
| | **Status:** Open Source Research Implementation |
| | **License:** AGPLv3 + Commercial Licensing Available |
| | |
| | ## What's Included |
| | |
| | This release provides a complete experimental framework for bit-native language modeling research: |
| | |
| | - **Core Architecture:** 57 Python files implementing bit-native transformer with reversible layers |
| | - **Safety Systems:** Real-time K/C/S telemetry and monitoring |
| | - **Research Tools:** Interactive dashboard, distributed training, comprehensive testing |
| | - **Documentation:** Professional model card, research status, and validation reports |
| | |
| | ## Important Notes |
| | |
| | β οΈ **Experimental Status:** This is research code requiring rigorous baseline validation |
| | β οΈ **Not Production Ready:** Needs extensive evaluation vs standard transformers |
| | β οΈ **Research Use Only:** Intended for academic investigation and experimentation |
| | |
| | ## Licensing |
| | |
| | - **Open Source:** AGPLv3 for research and open source use |
| | - **Commercial:** Contact contact@wcnegentropy.com for commercial licensing |
| | |
| | ## Next Steps |
| | |
| | The research community is invited to: |
| | 1. Conduct rigorous baseline comparisons vs standard transformers |
| | 2. Evaluate on established language modeling benchmarks |
| | 3. Validate (or refute) claimed memory efficiency benefits |
| | 4. Share findings openly to advance the field |
| | |
| | **Research responsibly. Validate rigorously. Share openly.** |
| | """ |
| | |
| | release_file = Path(__file__).parent / "RELEASE_INFO.md" |
| | with open(release_file, 'w') as f: |
| | f.write(release_info) |
| | |
| | logger.info("Created RELEASE_INFO.md") |
| | return release_file |
| |
|
| | if __name__ == "__main__": |
| | |
| | create_release_info() |
| | |
| | |
| | success = sync_repository_to_hf() |
| | |
| | if success: |
| | print("\nπ BitTransformerLM OS Launch Sync Complete!") |
| | print("π Repository: https://huggingface.co/WCNegentropy/BitTransformerLM") |
| | print("π§ Commercial inquiries: contact@wcnegentropy.com") |
| | print("\nReady for research community evaluation! π§ͺβ¨") |
| | else: |
| | print("\nβ Sync failed. Please check logs and try again.") |