kasimali's picture
Upload code from Colab
3ce7303 verified
# Whisper Malayalam Fine-tuning Code
# Extracted from Colab session
# Cell 1
# ============================================================
# PUSH TRAINED MODEL TO HF HUB - FIXED!
# ============================================================
from huggingface_hub import notebook_login
from transformers import WhisperForConditionalGeneration, WhisperProcessor
# 1) Login
notebook_login()
# 2) Your details
YOUR_USERNAME = "kasimali"
MODEL_NAME = "whisper-small-malayalam"
# 3) Load from LOCAL folder (not repo!)
print("πŸ“¦ Loading model from local folder...")
model = WhisperForConditionalGeneration.from_pretrained("./whisper-small-ml-final", local_files_only=True)
processor = WhisperProcessor.from_pretrained("./whisper-small-ml-final", local_files_only=True)
print("βœ… Model loaded!")
# 4) Push to HF Hub
print(f"πŸ“€ Pushing to {YOUR_USERNAME}/{MODEL_NAME}...")
model.push_to_hub(f"{YOUR_USERNAME}/{MODEL_NAME}", use_auth_token=True)
processor.push_to_hub(f"{YOUR_USERNAME}/{MODEL_NAME}", use_auth_token=True)
print(f"βœ… DONE!")
print(f"πŸ”— Your model: https://huggingface.co/{YOUR_USERNAME}/{MODEL_NAME}")
# Cell 2
# ============================================================
# UPLOAD MODEL FOLDER DIRECTLY TO HF HUB
# ============================================================
from huggingface_hub import HfApi, create_repo, notebook_login
import os
# 1) Login
notebook_login()
# 2) Config
YOUR_USERNAME = "kasimali"
MODEL_NAME = "whisper-small-malayalam"
LOCAL_FOLDER = "whisper-small-ml-final" # Without ./
# 3) Create repo on HF Hub
api = HfApi()
print(f"πŸ“¦ Creating repo: {YOUR_USERNAME}/{MODEL_NAME}...")
try:
create_repo(
repo_id=f"{YOUR_USERNAME}/{MODEL_NAME}",
repo_type="model",
private=False
)
print("βœ… Repo created!")
except Exception as e:
print(f"⚠️ Repo might already exist: {e}")
# 4) Upload entire folder
print(f"πŸ“€ Uploading {LOCAL_FOLDER} to Hub...")
api.upload_folder(
folder_path=LOCAL_FOLDER,
repo_id=f"{YOUR_USERNAME}/{MODEL_NAME}",
repo_type="model",
)
print(f"\nβœ… SUCCESS!")
print(f"πŸ”— Your model: https://huggingface.co/{YOUR_USERNAME}/{MODEL_NAME}")
# Cell 3
# Check what folders/files exist
import os
print("πŸ“ Files in /content:")
for item in os.listdir("/content"):
print(f" - {item}")
print("\nπŸ“ Looking for model folders...")
# Check common save locations
possible_folders = [
"whisper-small-ml",
"whisper-small-ml-final",
"./whisper-small-ml",
"./whisper-small-ml-final"
]
for folder in possible_folders:
if os.path.exists(folder):
print(f"βœ… Found: {folder}")
print(f" Contents: {os.listdir(folder)[:5]}") # Show first 5 files
else:
print(f"❌ Not found: {folder}")
# Cell 4
from huggingface_hub import HfApi, create_repo, notebook_login
notebook_login()
YOUR_USERNAME = "kasimali"
SPACE_NAME = "malayalam-whisper-finetuning"
# Create Space
create_repo(
repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}",
repo_type="space",
space_sdk="gradio",
private=False
)
# Create app.py that displays notebook
app_code = '''import gradio as gr
# Simple viewer
def show_notebook():
return "Upload finetunning.ipynb to this Space to share your training code!"
demo = gr.Interface(
fn=show_notebook,
inputs=None,
outputs=gr.Textbox(),
title="Malayalam Whisper Fine-tuning Notebook"
)
demo.launch()
'''
# Write and upload
with open("app.py", "w") as f:
f.write(app_code)
api = HfApi()
api.upload_file(
path_or_fileobj="app.py",
path_in_repo="app.py",
repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}",
repo_type="space"
)
# Upload your notebook
from google.colab import files
files.download("finetunning.ipynb")
api.upload_file(
path_or_fileobj="finetunning.ipynb",
path_in_repo="finetunning.ipynb",
repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}",
repo_type="space"
)
print(f"βœ… Space: https://huggingface.co/spaces/{YOUR_USERNAME}/{SPACE_NAME}")
# Cell 5
from huggingface_hub import HfApi, create_repo, notebook_login
# Login
notebook_login()
# Configuration
USERNAME = "kasimali"
SPACE_NAME = "whisper-malayalam-finetuning"
SPACE_ID = f"{USERNAME}/{SPACE_NAME}"
# Create Space
try:
create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static")
print(f"Created Space: {SPACE_ID}")
except:
print("Space already exists")
# Get notebook from Colab
import json
from google.colab import _message
notebook = _message.blocking_request('get_ipynb', timeout_sec=10)
notebook_content = json.dumps(notebook, indent=2)
# Save notebook locally
with open("FINETUNINNG.ipynb", "w") as f:
f.write(notebook_content)
# Upload to Space
api = HfApi()
api.upload_file(
path_or_fileobj="FINETUNINNG.ipynb",
path_in_repo="FINETUNINNG.ipynb",
repo_id=SPACE_ID,
repo_type="space"
)
# Create README
readme = f"""---
title: Whisper Malayalam Fine-tuning
emoji: 🎀
colorFrom: blue
colorTo: green
sdk: static
---
# Whisper Malayalam Fine-tuning
This Space contains the training notebook for fine-tuning Whisper on Malayalam language.
## Files
- FINETUNINNG.ipynb: Training notebook
## Usage
Download the notebook and run it in Google Colab or Jupyter.
"""
with open("README.md", "w") as f:
f.write(readme)
api.upload_file(
path_or_fileobj="README.md",
path_in_repo="README.md",
repo_id=SPACE_ID,
repo_type="space"
)
print(f"Done. Visit: https://huggingface.co/spaces/{SPACE_ID}")
# Cell 6
from huggingface_hub import HfApi, create_repo, notebook_login
import os
import glob
# Login once
notebook_login()
# Configuration
USERNAME = "kasimali"
# Find all notebook files
print("Searching for notebook files...")
notebook_files = []
# Search in common locations
search_paths = [
"/content/*.ipynb",
"/content/**/*.ipynb",
]
for pattern in search_paths:
notebook_files.extend(glob.glob(pattern, recursive=True))
# Remove duplicates
notebook_files = list(set(notebook_files))
print(f"\nFound {len(notebook_files)} notebook(s):")
for i, nb in enumerate(notebook_files):
print(f"{i+1}. {nb}")
if len(notebook_files) == 0:
print("\nNo notebooks found! Make sure you have .ipynb files in /content/")
else:
print(f"\nCreating {len(notebook_files)} separate Spaces...")
api = HfApi()
created_spaces = []
for notebook_path in notebook_files:
# Get notebook name without extension
notebook_name = os.path.basename(notebook_path).replace(".ipynb", "")
# Clean name for Space (remove special chars, lowercase)
space_name = notebook_name.lower().replace(" ", "-").replace("_", "-")
space_id = f"{USERNAME}/{space_name}"
print(f"\n{'='*60}")
print(f"Processing: {notebook_name}")
print(f"Space ID: {space_id}")
try:
# Create Space
create_repo(
repo_id=space_id,
repo_type="space",
space_sdk="static",
exist_ok=True
)
print(f"βœ… Space created/exists")
# Upload notebook
api.upload_file(
path_or_fileobj=notebook_path,
path_in_repo=os.path.basename(notebook_path),
repo_id=space_id,
repo_type="space",
commit_message=f"Upload {notebook_name}"
)
print(f"βœ… Notebook uploaded")
# Create README
readme = f"""---
title: {notebook_name}
emoji: πŸ““
colorFrom: blue
colorTo: green
sdk: static
---
# {notebook_name}
This Space contains the notebook: **{os.path.basename(notebook_path)}**
## Usage
Download the notebook and run it in Google Colab or Jupyter.
## Files
- {os.path.basename(notebook_path)}
"""
# Save and upload README
readme_path = f"/tmp/README_{space_name}.md"
with open(readme_path, "w") as f:
f.write(readme)
api.upload_file(
path_or_fileobj=readme_path,
path_in_repo="README.md",
repo_id=space_id,
repo_type="space",
commit_message="Add README"
)
print(f"βœ… README added")
created_spaces.append({
'name': notebook_name,
'url': f"https://huggingface.co/spaces/{space_id}"
})
except Exception as e:
print(f"❌ Error: {e}")
# Summary
print(f"\n{'='*60}")
print(f"SUMMARY: Created {len(created_spaces)} Spaces")
print(f"{'='*60}")
for space in created_spaces:
print(f"\nπŸ““ {space['name']}")
print(f" πŸ”— {space['url']}")
print(f"\nβœ… All done!")
# Cell 7
from huggingface_hub import HfApi, create_repo, notebook_login
import json
# Login
notebook_login()
# Configuration
USERNAME = "kasimali"
SPACE_NAME = "whisper-malayalam-code"
SPACE_ID = f"{USERNAME}/{SPACE_NAME}"
# Get all code from current session
print("Extracting code from Colab session...")
# Get execution history
from IPython import get_ipython
ipython = get_ipython()
# Get all executed code
all_code = []
for i, cell in enumerate(ipython.user_ns.get('In', [])):
if cell and cell.strip():
all_code.append(f"# Cell {i}\n{cell}\n")
# Combine all code
full_code = "\n\n".join(all_code)
# Save as Python file
print("Creating app.py...")
app_content = f"""# Whisper Malayalam Fine-tuning Code
# Extracted from Colab session
{full_code}
"""
with open("app.py", "w") as f:
f.write(app_content)
print(f"βœ… Created app.py ({len(full_code)} characters)")
# Create Space
print(f"\nCreating Space: {SPACE_ID}...")
try:
create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static", exist_ok=True)
print("βœ… Space created")
except Exception as e:
print(f"Space exists: {e}")
# Upload
api = HfApi()
print("\nUploading app.py...")
api.upload_file(
path_or_fileobj="app.py",
path_in_repo="app.py",
repo_id=SPACE_ID,
repo_type="space",
commit_message="Upload code from Colab"
)
# Create requirements.txt
requirements = """datasets==3.1.0
transformers
accelerate
evaluate
jiwer
torch
"""
with open("requirements.txt", "w") as f:
f.write(requirements)
api.upload_file(
path_or_fileobj="requirements.txt",
path_in_repo="requirements.txt",
repo_id=SPACE_ID,
repo_type="space",
commit_message="Add requirements"
)
# Create README
readme = f"""---
title: Whisper Malayalam Fine-tuning Code
emoji: 🎀
colorFrom: blue
colorTo: green
sdk: static
---
# Whisper Malayalam Fine-tuning Code
This Space contains the Python code for fine-tuning Whisper on Malayalam.
## Files
- `app.py`: Main training code (extracted from Colab)
- `requirements.txt`: Python dependencies
## Usage
# Cell 8
from huggingface_hub import HfApi, create_repo, notebook_login
# Login
notebook_login()
# Configuration
USERNAME = "kasimali"
SPACE_NAME = "whisper-malayalam-code"
SPACE_ID = f"{USERNAME}/{SPACE_NAME}"
# Get all code from current session
print("Extracting code from Colab session...")
from IPython import get_ipython
ipython = get_ipython()
# Get all executed code
all_code = []
for i, cell in enumerate(ipython.user_ns.get('In', [])):
if cell and cell.strip():
all_code.append(f"# Cell {i}\n{cell}\n")
# Combine all code
full_code = "\n\n".join(all_code)
# Save as Python file
print("Creating app.py...")
app_content = "# Whisper Malayalam Fine-tuning Code\n"
app_content += "# Extracted from Colab session\n\n"
app_content += full_code
with open("app.py", "w") as f:
f.write(app_content)
print(f"Created app.py ({len(full_code)} characters)")
# Create Space
print(f"\nCreating Space: {SPACE_ID}...")
try:
create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static", exist_ok=True)
print("Space created")
except:
print("Space already exists")
# Upload
api = HfApi()
print("\nUploading app.py...")
api.upload_file(
path_or_fileobj="app.py",
path_in_repo="app.py",
repo_id=SPACE_ID,
repo_type="space",
commit_message="Upload code from Colab"
)
# Create requirements.txt
requirements = "datasets==3.1.0\ntransformers\naccelerate\nevaluate\njiwer\ntorch\n"
with open("requirements.txt", "w") as f:
f.write(requirements)
api.upload_file(
path_or_fileobj="requirements.txt",
path_in_repo="requirements.txt",
repo_id=SPACE_ID,
repo_type="space",
commit_message="Add requirements"
)
# Create README
readme_content = "---\n"
readme_content += "title: Whisper Malayalam Code\n"
readme_content += "emoji: 🎀\n"
readme_content += "colorFrom: blue\n"
readme_content += "colorTo: green\n"
readme_content += "sdk: static\n"
readme_content += "---\n\n"
readme_content += "# Whisper Malayalam Fine-tuning Code\n\n"
readme_content += "Python code for fine-tuning Whisper on Malayalam.\n\n"
readme_content += "## Files\n"
readme_content += "- app.py: Main training code\n"
readme_content += "- requirements.txt: Dependencies\n\n"
readme_content += "## Usage\n"
readme_content += "pip install -r requirements.txt\n"
readme_content += "python app.py\n"
with open("README.md", "w") as f:
f.write(readme_content)
api.upload_file(
path_or_fileobj="README.md",
path_in_repo="README.md",
repo_id=SPACE_ID,
repo_type="space",
commit_message="Add README"
)
print("\nSUCCESS!")
print(f"Uploaded: app.py, requirements.txt, README.md")
print(f"\nView at: https://huggingface.co/spaces/{SPACE_ID}")