|
|
|
|
|
""" |
|
|
Simple Hugging Face upload script for Bengali AI model |
|
|
""" |
|
|
|
|
|
import os |
|
|
from huggingface_hub import HfApi, create_repo, upload_folder |
|
|
|
|
|
def upload_model_simple(): |
|
|
"""Upload model with environment-based authentication""" |
|
|
|
|
|
print("🚀 Uploading Bengali AI to Hugging Face Hub...") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
api = HfApi() |
|
|
|
|
|
|
|
|
repo_id = "megharudushi/Sheikh" |
|
|
local_dir = "./ready_bengali_ai" |
|
|
|
|
|
if not os.path.exists(local_dir): |
|
|
print(f"❌ Directory {local_dir} not found!") |
|
|
return False |
|
|
|
|
|
try: |
|
|
|
|
|
print("🔑 Checking Hugging Face authentication...") |
|
|
user_info = api.whoami() |
|
|
print(f"✅ Authenticated as: {user_info['name']}") |
|
|
|
|
|
|
|
|
print(f"🔗 Creating repository: {repo_id}") |
|
|
repo_url = create_repo( |
|
|
repo_id=repo_id, |
|
|
exist_ok=True, |
|
|
repo_type="model" |
|
|
) |
|
|
print(f"✅ Repository ready: {repo_url}") |
|
|
|
|
|
|
|
|
print(f"📤 Uploading {len(os.listdir(local_dir))} files...") |
|
|
upload_folder( |
|
|
folder_path=local_dir, |
|
|
repo_id=repo_id, |
|
|
commit_message="Complete Bengali AI model with tokenizer and configs" |
|
|
) |
|
|
|
|
|
print("🎉 Upload successful!") |
|
|
print(f"🌐 Your model: https://huggingface.co/{repo_id}") |
|
|
|
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"❌ Upload failed: {e}") |
|
|
|
|
|
if "authentication" in str(e).lower(): |
|
|
print("\n🔧 Authentication Issue:") |
|
|
print("1. Set your HF token: export HF_TOKEN=your_token_here") |
|
|
print("2. Or use: huggingface-cli login") |
|
|
print("3. Or set token in code: api.login()") |
|
|
|
|
|
return False |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
readme_content = """# Bengali AI Model - Sheikh |
|
|
|
|
|
A Bengali (Bangla) language AI model for instruction following and text generation. |
|
|
|
|
|
## Model Details |
|
|
- **Base**: microsoft/DialoGPT-medium |
|
|
- **Language**: Bengali (Bangla) |
|
|
- **Parameters**: 355M |
|
|
- **Size**: 1.4GB |
|
|
- **Data**: Alpaca Bangla dataset |
|
|
|
|
|
## Usage |
|
|
```python |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("megharudushi/Sheikh") |
|
|
model = AutoModelForCausalLM.from_pretrained("megharudushi/Sheikh") |
|
|
|
|
|
# Bengali query |
|
|
input_text = "বাংলাদেশের রাজধানী কী?" |
|
|
inputs = tokenizer.encode(input_text, return_tensors="pt") |
|
|
outputs = model.generate(inputs, max_length=150) |
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
print(response) |
|
|
``` |
|
|
|
|
|
## Features |
|
|
- Bengali language understanding |
|
|
- Educational content generation |
|
|
- Cultural knowledge responses |
|
|
- General conversation support |
|
|
""" |
|
|
|
|
|
with open("./ready_bengali_ai/README.md", "w", encoding="utf-8") as f: |
|
|
f.write(readme_content) |
|
|
|
|
|
print("📄 Model card created") |
|
|
|
|
|
|
|
|
success = upload_model_simple() |
|
|
|
|
|
if success: |
|
|
print("\n🎉 SUCCESS! Your Bengali AI is now on Hugging Face!") |
|
|
print("Repository: https://huggingface.co/megharudushi/Sheikh") |
|
|
else: |
|
|
print("\n⚠️ Upload failed. Check authentication and try again.") |