#!/usr/bin/env python3 """ Simple Hugging Face upload script for Bengali AI model """ import os from huggingface_hub import HfApi, create_repo, upload_folder def upload_model_simple(): """Upload model with environment-based authentication""" print("šŸš€ Uploading Bengali AI to Hugging Face Hub...") print("=" * 50) # Initialize API api = HfApi() # Repository details repo_id = "megharudushi/Sheikh" local_dir = "./ready_bengali_ai" if not os.path.exists(local_dir): print(f"āŒ Directory {local_dir} not found!") return False try: # Check authentication print("šŸ”‘ Checking Hugging Face authentication...") user_info = api.whoami() print(f"āœ… Authenticated as: {user_info['name']}") # Create repository print(f"šŸ”— Creating repository: {repo_id}") repo_url = create_repo( repo_id=repo_id, exist_ok=True, repo_type="model" ) print(f"āœ… Repository ready: {repo_url}") # Upload files print(f"šŸ“¤ Uploading {len(os.listdir(local_dir))} files...") upload_folder( folder_path=local_dir, repo_id=repo_id, commit_message="Complete Bengali AI model with tokenizer and configs" ) print("šŸŽ‰ Upload successful!") print(f"🌐 Your model: https://huggingface.co/{repo_id}") return True except Exception as e: print(f"āŒ Upload failed: {e}") if "authentication" in str(e).lower(): print("\nšŸ”§ Authentication Issue:") print("1. Set your HF token: export HF_TOKEN=your_token_here") print("2. Or use: huggingface-cli login") print("3. Or set token in code: api.login()") return False if __name__ == "__main__": # Create model card readme_content = """# Bengali AI Model - Sheikh A Bengali (Bangla) language AI model for instruction following and text generation. ## Model Details - **Base**: microsoft/DialoGPT-medium - **Language**: Bengali (Bangla) - **Parameters**: 355M - **Size**: 1.4GB - **Data**: Alpaca Bangla dataset ## Usage ```python from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("megharudushi/Sheikh") model = AutoModelForCausalLM.from_pretrained("megharudushi/Sheikh") # Bengali query input_text = "বাংলাদেশের ą¦°ą¦¾ą¦œą¦§ą¦¾ą¦Øą§€ কী?" inputs = tokenizer.encode(input_text, return_tensors="pt") outputs = model.generate(inputs, max_length=150) response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(response) ``` ## Features - Bengali language understanding - Educational content generation - Cultural knowledge responses - General conversation support """ with open("./ready_bengali_ai/README.md", "w", encoding="utf-8") as f: f.write(readme_content) print("šŸ“„ Model card created") # Attempt upload success = upload_model_simple() if success: print("\nšŸŽ‰ SUCCESS! Your Bengali AI is now on Hugging Face!") print("Repository: https://huggingface.co/megharudushi/Sheikh") else: print("\nāš ļø Upload failed. Check authentication and try again.")