Sheikh / hf_upload_simple.py
megharudushi's picture
Upload folder using huggingface_hub
7d3d63c verified
#!/usr/bin/env python3
"""
Simple Hugging Face upload script for Bengali AI model
"""
import os
from huggingface_hub import HfApi, create_repo, upload_folder
def upload_model_simple():
"""Upload model with environment-based authentication"""
print("🚀 Uploading Bengali AI to Hugging Face Hub...")
print("=" * 50)
# Initialize API
api = HfApi()
# Repository details
repo_id = "megharudushi/Sheikh"
local_dir = "./ready_bengali_ai"
if not os.path.exists(local_dir):
print(f"❌ Directory {local_dir} not found!")
return False
try:
# Check authentication
print("🔑 Checking Hugging Face authentication...")
user_info = api.whoami()
print(f"✅ Authenticated as: {user_info['name']}")
# Create repository
print(f"🔗 Creating repository: {repo_id}")
repo_url = create_repo(
repo_id=repo_id,
exist_ok=True,
repo_type="model"
)
print(f"✅ Repository ready: {repo_url}")
# Upload files
print(f"📤 Uploading {len(os.listdir(local_dir))} files...")
upload_folder(
folder_path=local_dir,
repo_id=repo_id,
commit_message="Complete Bengali AI model with tokenizer and configs"
)
print("🎉 Upload successful!")
print(f"🌐 Your model: https://huggingface.co/{repo_id}")
return True
except Exception as e:
print(f"❌ Upload failed: {e}")
if "authentication" in str(e).lower():
print("\n🔧 Authentication Issue:")
print("1. Set your HF token: export HF_TOKEN=your_token_here")
print("2. Or use: huggingface-cli login")
print("3. Or set token in code: api.login()")
return False
if __name__ == "__main__":
# Create model card
readme_content = """# Bengali AI Model - Sheikh
A Bengali (Bangla) language AI model for instruction following and text generation.
## Model Details
- **Base**: microsoft/DialoGPT-medium
- **Language**: Bengali (Bangla)
- **Parameters**: 355M
- **Size**: 1.4GB
- **Data**: Alpaca Bangla dataset
## Usage
```python
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("megharudushi/Sheikh")
model = AutoModelForCausalLM.from_pretrained("megharudushi/Sheikh")
# Bengali query
input_text = "বাংলাদেশের রাজধানী কী?"
inputs = tokenizer.encode(input_text, return_tensors="pt")
outputs = model.generate(inputs, max_length=150)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(response)
```
## Features
- Bengali language understanding
- Educational content generation
- Cultural knowledge responses
- General conversation support
"""
with open("./ready_bengali_ai/README.md", "w", encoding="utf-8") as f:
f.write(readme_content)
print("📄 Model card created")
# Attempt upload
success = upload_model_simple()
if success:
print("\n🎉 SUCCESS! Your Bengali AI is now on Hugging Face!")
print("Repository: https://huggingface.co/megharudushi/Sheikh")
else:
print("\n⚠️ Upload failed. Check authentication and try again.")