# Whisper Malayalam Fine-tuning Code # Extracted from Colab session # Cell 1 # ============================================================ # PUSH TRAINED MODEL TO HF HUB - FIXED! # ============================================================ from huggingface_hub import notebook_login from transformers import WhisperForConditionalGeneration, WhisperProcessor # 1) Login notebook_login() # 2) Your details YOUR_USERNAME = "kasimali" MODEL_NAME = "whisper-small-malayalam" # 3) Load from LOCAL folder (not repo!) print("šŸ“¦ Loading model from local folder...") model = WhisperForConditionalGeneration.from_pretrained("./whisper-small-ml-final", local_files_only=True) processor = WhisperProcessor.from_pretrained("./whisper-small-ml-final", local_files_only=True) print("āœ… Model loaded!") # 4) Push to HF Hub print(f"šŸ“¤ Pushing to {YOUR_USERNAME}/{MODEL_NAME}...") model.push_to_hub(f"{YOUR_USERNAME}/{MODEL_NAME}", use_auth_token=True) processor.push_to_hub(f"{YOUR_USERNAME}/{MODEL_NAME}", use_auth_token=True) print(f"āœ… DONE!") print(f"šŸ”— Your model: https://huggingface.co/{YOUR_USERNAME}/{MODEL_NAME}") # Cell 2 # ============================================================ # UPLOAD MODEL FOLDER DIRECTLY TO HF HUB # ============================================================ from huggingface_hub import HfApi, create_repo, notebook_login import os # 1) Login notebook_login() # 2) Config YOUR_USERNAME = "kasimali" MODEL_NAME = "whisper-small-malayalam" LOCAL_FOLDER = "whisper-small-ml-final" # Without ./ # 3) Create repo on HF Hub api = HfApi() print(f"šŸ“¦ Creating repo: {YOUR_USERNAME}/{MODEL_NAME}...") try: create_repo( repo_id=f"{YOUR_USERNAME}/{MODEL_NAME}", repo_type="model", private=False ) print("āœ… Repo created!") except Exception as e: print(f"āš ļø Repo might already exist: {e}") # 4) Upload entire folder print(f"šŸ“¤ Uploading {LOCAL_FOLDER} to Hub...") api.upload_folder( folder_path=LOCAL_FOLDER, repo_id=f"{YOUR_USERNAME}/{MODEL_NAME}", repo_type="model", ) print(f"\nāœ… SUCCESS!") print(f"šŸ”— Your model: https://huggingface.co/{YOUR_USERNAME}/{MODEL_NAME}") # Cell 3 # Check what folders/files exist import os print("šŸ“ Files in /content:") for item in os.listdir("/content"): print(f" - {item}") print("\nšŸ“ Looking for model folders...") # Check common save locations possible_folders = [ "whisper-small-ml", "whisper-small-ml-final", "./whisper-small-ml", "./whisper-small-ml-final" ] for folder in possible_folders: if os.path.exists(folder): print(f"āœ… Found: {folder}") print(f" Contents: {os.listdir(folder)[:5]}") # Show first 5 files else: print(f"āŒ Not found: {folder}") # Cell 4 from huggingface_hub import HfApi, create_repo, notebook_login notebook_login() YOUR_USERNAME = "kasimali" SPACE_NAME = "malayalam-whisper-finetuning" # Create Space create_repo( repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}", repo_type="space", space_sdk="gradio", private=False ) # Create app.py that displays notebook app_code = '''import gradio as gr # Simple viewer def show_notebook(): return "Upload finetunning.ipynb to this Space to share your training code!" demo = gr.Interface( fn=show_notebook, inputs=None, outputs=gr.Textbox(), title="Malayalam Whisper Fine-tuning Notebook" ) demo.launch() ''' # Write and upload with open("app.py", "w") as f: f.write(app_code) api = HfApi() api.upload_file( path_or_fileobj="app.py", path_in_repo="app.py", repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}", repo_type="space" ) # Upload your notebook from google.colab import files files.download("finetunning.ipynb") api.upload_file( path_or_fileobj="finetunning.ipynb", path_in_repo="finetunning.ipynb", repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}", repo_type="space" ) print(f"āœ… Space: https://huggingface.co/spaces/{YOUR_USERNAME}/{SPACE_NAME}") # Cell 5 from huggingface_hub import HfApi, create_repo, notebook_login # Login notebook_login() # Configuration USERNAME = "kasimali" SPACE_NAME = "whisper-malayalam-finetuning" SPACE_ID = f"{USERNAME}/{SPACE_NAME}" # Create Space try: create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static") print(f"Created Space: {SPACE_ID}") except: print("Space already exists") # Get notebook from Colab import json from google.colab import _message notebook = _message.blocking_request('get_ipynb', timeout_sec=10) notebook_content = json.dumps(notebook, indent=2) # Save notebook locally with open("FINETUNINNG.ipynb", "w") as f: f.write(notebook_content) # Upload to Space api = HfApi() api.upload_file( path_or_fileobj="FINETUNINNG.ipynb", path_in_repo="FINETUNINNG.ipynb", repo_id=SPACE_ID, repo_type="space" ) # Create README readme = f"""--- title: Whisper Malayalam Fine-tuning emoji: šŸŽ¤ colorFrom: blue colorTo: green sdk: static --- # Whisper Malayalam Fine-tuning This Space contains the training notebook for fine-tuning Whisper on Malayalam language. ## Files - FINETUNINNG.ipynb: Training notebook ## Usage Download the notebook and run it in Google Colab or Jupyter. """ with open("README.md", "w") as f: f.write(readme) api.upload_file( path_or_fileobj="README.md", path_in_repo="README.md", repo_id=SPACE_ID, repo_type="space" ) print(f"Done. Visit: https://huggingface.co/spaces/{SPACE_ID}") # Cell 6 from huggingface_hub import HfApi, create_repo, notebook_login import os import glob # Login once notebook_login() # Configuration USERNAME = "kasimali" # Find all notebook files print("Searching for notebook files...") notebook_files = [] # Search in common locations search_paths = [ "/content/*.ipynb", "/content/**/*.ipynb", ] for pattern in search_paths: notebook_files.extend(glob.glob(pattern, recursive=True)) # Remove duplicates notebook_files = list(set(notebook_files)) print(f"\nFound {len(notebook_files)} notebook(s):") for i, nb in enumerate(notebook_files): print(f"{i+1}. {nb}") if len(notebook_files) == 0: print("\nNo notebooks found! Make sure you have .ipynb files in /content/") else: print(f"\nCreating {len(notebook_files)} separate Spaces...") api = HfApi() created_spaces = [] for notebook_path in notebook_files: # Get notebook name without extension notebook_name = os.path.basename(notebook_path).replace(".ipynb", "") # Clean name for Space (remove special chars, lowercase) space_name = notebook_name.lower().replace(" ", "-").replace("_", "-") space_id = f"{USERNAME}/{space_name}" print(f"\n{'='*60}") print(f"Processing: {notebook_name}") print(f"Space ID: {space_id}") try: # Create Space create_repo( repo_id=space_id, repo_type="space", space_sdk="static", exist_ok=True ) print(f"āœ… Space created/exists") # Upload notebook api.upload_file( path_or_fileobj=notebook_path, path_in_repo=os.path.basename(notebook_path), repo_id=space_id, repo_type="space", commit_message=f"Upload {notebook_name}" ) print(f"āœ… Notebook uploaded") # Create README readme = f"""--- title: {notebook_name} emoji: šŸ““ colorFrom: blue colorTo: green sdk: static --- # {notebook_name} This Space contains the notebook: **{os.path.basename(notebook_path)}** ## Usage Download the notebook and run it in Google Colab or Jupyter. ## Files - {os.path.basename(notebook_path)} """ # Save and upload README readme_path = f"/tmp/README_{space_name}.md" with open(readme_path, "w") as f: f.write(readme) api.upload_file( path_or_fileobj=readme_path, path_in_repo="README.md", repo_id=space_id, repo_type="space", commit_message="Add README" ) print(f"āœ… README added") created_spaces.append({ 'name': notebook_name, 'url': f"https://huggingface.co/spaces/{space_id}" }) except Exception as e: print(f"āŒ Error: {e}") # Summary print(f"\n{'='*60}") print(f"SUMMARY: Created {len(created_spaces)} Spaces") print(f"{'='*60}") for space in created_spaces: print(f"\nšŸ““ {space['name']}") print(f" šŸ”— {space['url']}") print(f"\nāœ… All done!") # Cell 7 from huggingface_hub import HfApi, create_repo, notebook_login import json # Login notebook_login() # Configuration USERNAME = "kasimali" SPACE_NAME = "whisper-malayalam-code" SPACE_ID = f"{USERNAME}/{SPACE_NAME}" # Get all code from current session print("Extracting code from Colab session...") # Get execution history from IPython import get_ipython ipython = get_ipython() # Get all executed code all_code = [] for i, cell in enumerate(ipython.user_ns.get('In', [])): if cell and cell.strip(): all_code.append(f"# Cell {i}\n{cell}\n") # Combine all code full_code = "\n\n".join(all_code) # Save as Python file print("Creating app.py...") app_content = f"""# Whisper Malayalam Fine-tuning Code # Extracted from Colab session {full_code} """ with open("app.py", "w") as f: f.write(app_content) print(f"āœ… Created app.py ({len(full_code)} characters)") # Create Space print(f"\nCreating Space: {SPACE_ID}...") try: create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static", exist_ok=True) print("āœ… Space created") except Exception as e: print(f"Space exists: {e}") # Upload api = HfApi() print("\nUploading app.py...") api.upload_file( path_or_fileobj="app.py", path_in_repo="app.py", repo_id=SPACE_ID, repo_type="space", commit_message="Upload code from Colab" ) # Create requirements.txt requirements = """datasets==3.1.0 transformers accelerate evaluate jiwer torch """ with open("requirements.txt", "w") as f: f.write(requirements) api.upload_file( path_or_fileobj="requirements.txt", path_in_repo="requirements.txt", repo_id=SPACE_ID, repo_type="space", commit_message="Add requirements" ) # Create README readme = f"""--- title: Whisper Malayalam Fine-tuning Code emoji: šŸŽ¤ colorFrom: blue colorTo: green sdk: static --- # Whisper Malayalam Fine-tuning Code This Space contains the Python code for fine-tuning Whisper on Malayalam. ## Files - `app.py`: Main training code (extracted from Colab) - `requirements.txt`: Python dependencies ## Usage # Cell 8 from huggingface_hub import HfApi, create_repo, notebook_login # Login notebook_login() # Configuration USERNAME = "kasimali" SPACE_NAME = "whisper-malayalam-code" SPACE_ID = f"{USERNAME}/{SPACE_NAME}" # Get all code from current session print("Extracting code from Colab session...") from IPython import get_ipython ipython = get_ipython() # Get all executed code all_code = [] for i, cell in enumerate(ipython.user_ns.get('In', [])): if cell and cell.strip(): all_code.append(f"# Cell {i}\n{cell}\n") # Combine all code full_code = "\n\n".join(all_code) # Save as Python file print("Creating app.py...") app_content = "# Whisper Malayalam Fine-tuning Code\n" app_content += "# Extracted from Colab session\n\n" app_content += full_code with open("app.py", "w") as f: f.write(app_content) print(f"Created app.py ({len(full_code)} characters)") # Create Space print(f"\nCreating Space: {SPACE_ID}...") try: create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static", exist_ok=True) print("Space created") except: print("Space already exists") # Upload api = HfApi() print("\nUploading app.py...") api.upload_file( path_or_fileobj="app.py", path_in_repo="app.py", repo_id=SPACE_ID, repo_type="space", commit_message="Upload code from Colab" ) # Create requirements.txt requirements = "datasets==3.1.0\ntransformers\naccelerate\nevaluate\njiwer\ntorch\n" with open("requirements.txt", "w") as f: f.write(requirements) api.upload_file( path_or_fileobj="requirements.txt", path_in_repo="requirements.txt", repo_id=SPACE_ID, repo_type="space", commit_message="Add requirements" ) # Create README readme_content = "---\n" readme_content += "title: Whisper Malayalam Code\n" readme_content += "emoji: šŸŽ¤\n" readme_content += "colorFrom: blue\n" readme_content += "colorTo: green\n" readme_content += "sdk: static\n" readme_content += "---\n\n" readme_content += "# Whisper Malayalam Fine-tuning Code\n\n" readme_content += "Python code for fine-tuning Whisper on Malayalam.\n\n" readme_content += "## Files\n" readme_content += "- app.py: Main training code\n" readme_content += "- requirements.txt: Dependencies\n\n" readme_content += "## Usage\n" readme_content += "pip install -r requirements.txt\n" readme_content += "python app.py\n" with open("README.md", "w") as f: f.write(readme_content) api.upload_file( path_or_fileobj="README.md", path_in_repo="README.md", repo_id=SPACE_ID, repo_type="space", commit_message="Add README" ) print("\nSUCCESS!") print(f"Uploaded: app.py, requirements.txt, README.md") print(f"\nView at: https://huggingface.co/spaces/{SPACE_ID}")