Spaces:
Running
Running
| # Whisper Malayalam Fine-tuning Code | |
| # Extracted from Colab session | |
| # Cell 1 | |
| # ============================================================ | |
| # PUSH TRAINED MODEL TO HF HUB - FIXED! | |
| # ============================================================ | |
| from huggingface_hub import notebook_login | |
| from transformers import WhisperForConditionalGeneration, WhisperProcessor | |
| # 1) Login | |
| notebook_login() | |
| # 2) Your details | |
| YOUR_USERNAME = "kasimali" | |
| MODEL_NAME = "whisper-small-malayalam" | |
| # 3) Load from LOCAL folder (not repo!) | |
| print("π¦ Loading model from local folder...") | |
| model = WhisperForConditionalGeneration.from_pretrained("./whisper-small-ml-final", local_files_only=True) | |
| processor = WhisperProcessor.from_pretrained("./whisper-small-ml-final", local_files_only=True) | |
| print("β Model loaded!") | |
| # 4) Push to HF Hub | |
| print(f"π€ Pushing to {YOUR_USERNAME}/{MODEL_NAME}...") | |
| model.push_to_hub(f"{YOUR_USERNAME}/{MODEL_NAME}", use_auth_token=True) | |
| processor.push_to_hub(f"{YOUR_USERNAME}/{MODEL_NAME}", use_auth_token=True) | |
| print(f"β DONE!") | |
| print(f"π Your model: https://huggingface.co/{YOUR_USERNAME}/{MODEL_NAME}") | |
| # Cell 2 | |
| # ============================================================ | |
| # UPLOAD MODEL FOLDER DIRECTLY TO HF HUB | |
| # ============================================================ | |
| from huggingface_hub import HfApi, create_repo, notebook_login | |
| import os | |
| # 1) Login | |
| notebook_login() | |
| # 2) Config | |
| YOUR_USERNAME = "kasimali" | |
| MODEL_NAME = "whisper-small-malayalam" | |
| LOCAL_FOLDER = "whisper-small-ml-final" # Without ./ | |
| # 3) Create repo on HF Hub | |
| api = HfApi() | |
| print(f"π¦ Creating repo: {YOUR_USERNAME}/{MODEL_NAME}...") | |
| try: | |
| create_repo( | |
| repo_id=f"{YOUR_USERNAME}/{MODEL_NAME}", | |
| repo_type="model", | |
| private=False | |
| ) | |
| print("β Repo created!") | |
| except Exception as e: | |
| print(f"β οΈ Repo might already exist: {e}") | |
| # 4) Upload entire folder | |
| print(f"π€ Uploading {LOCAL_FOLDER} to Hub...") | |
| api.upload_folder( | |
| folder_path=LOCAL_FOLDER, | |
| repo_id=f"{YOUR_USERNAME}/{MODEL_NAME}", | |
| repo_type="model", | |
| ) | |
| print(f"\nβ SUCCESS!") | |
| print(f"π Your model: https://huggingface.co/{YOUR_USERNAME}/{MODEL_NAME}") | |
| # Cell 3 | |
| # Check what folders/files exist | |
| import os | |
| print("π Files in /content:") | |
| for item in os.listdir("/content"): | |
| print(f" - {item}") | |
| print("\nπ Looking for model folders...") | |
| # Check common save locations | |
| possible_folders = [ | |
| "whisper-small-ml", | |
| "whisper-small-ml-final", | |
| "./whisper-small-ml", | |
| "./whisper-small-ml-final" | |
| ] | |
| for folder in possible_folders: | |
| if os.path.exists(folder): | |
| print(f"β Found: {folder}") | |
| print(f" Contents: {os.listdir(folder)[:5]}") # Show first 5 files | |
| else: | |
| print(f"β Not found: {folder}") | |
| # Cell 4 | |
| from huggingface_hub import HfApi, create_repo, notebook_login | |
| notebook_login() | |
| YOUR_USERNAME = "kasimali" | |
| SPACE_NAME = "malayalam-whisper-finetuning" | |
| # Create Space | |
| create_repo( | |
| repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}", | |
| repo_type="space", | |
| space_sdk="gradio", | |
| private=False | |
| ) | |
| # Create app.py that displays notebook | |
| app_code = '''import gradio as gr | |
| # Simple viewer | |
| def show_notebook(): | |
| return "Upload finetunning.ipynb to this Space to share your training code!" | |
| demo = gr.Interface( | |
| fn=show_notebook, | |
| inputs=None, | |
| outputs=gr.Textbox(), | |
| title="Malayalam Whisper Fine-tuning Notebook" | |
| ) | |
| demo.launch() | |
| ''' | |
| # Write and upload | |
| with open("app.py", "w") as f: | |
| f.write(app_code) | |
| api = HfApi() | |
| api.upload_file( | |
| path_or_fileobj="app.py", | |
| path_in_repo="app.py", | |
| repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}", | |
| repo_type="space" | |
| ) | |
| # Upload your notebook | |
| from google.colab import files | |
| files.download("finetunning.ipynb") | |
| api.upload_file( | |
| path_or_fileobj="finetunning.ipynb", | |
| path_in_repo="finetunning.ipynb", | |
| repo_id=f"{YOUR_USERNAME}/{SPACE_NAME}", | |
| repo_type="space" | |
| ) | |
| print(f"β Space: https://huggingface.co/spaces/{YOUR_USERNAME}/{SPACE_NAME}") | |
| # Cell 5 | |
| from huggingface_hub import HfApi, create_repo, notebook_login | |
| # Login | |
| notebook_login() | |
| # Configuration | |
| USERNAME = "kasimali" | |
| SPACE_NAME = "whisper-malayalam-finetuning" | |
| SPACE_ID = f"{USERNAME}/{SPACE_NAME}" | |
| # Create Space | |
| try: | |
| create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static") | |
| print(f"Created Space: {SPACE_ID}") | |
| except: | |
| print("Space already exists") | |
| # Get notebook from Colab | |
| import json | |
| from google.colab import _message | |
| notebook = _message.blocking_request('get_ipynb', timeout_sec=10) | |
| notebook_content = json.dumps(notebook, indent=2) | |
| # Save notebook locally | |
| with open("FINETUNINNG.ipynb", "w") as f: | |
| f.write(notebook_content) | |
| # Upload to Space | |
| api = HfApi() | |
| api.upload_file( | |
| path_or_fileobj="FINETUNINNG.ipynb", | |
| path_in_repo="FINETUNINNG.ipynb", | |
| repo_id=SPACE_ID, | |
| repo_type="space" | |
| ) | |
| # Create README | |
| readme = f"""--- | |
| title: Whisper Malayalam Fine-tuning | |
| emoji: π€ | |
| colorFrom: blue | |
| colorTo: green | |
| sdk: static | |
| --- | |
| # Whisper Malayalam Fine-tuning | |
| This Space contains the training notebook for fine-tuning Whisper on Malayalam language. | |
| ## Files | |
| - FINETUNINNG.ipynb: Training notebook | |
| ## Usage | |
| Download the notebook and run it in Google Colab or Jupyter. | |
| """ | |
| with open("README.md", "w") as f: | |
| f.write(readme) | |
| api.upload_file( | |
| path_or_fileobj="README.md", | |
| path_in_repo="README.md", | |
| repo_id=SPACE_ID, | |
| repo_type="space" | |
| ) | |
| print(f"Done. Visit: https://huggingface.co/spaces/{SPACE_ID}") | |
| # Cell 6 | |
| from huggingface_hub import HfApi, create_repo, notebook_login | |
| import os | |
| import glob | |
| # Login once | |
| notebook_login() | |
| # Configuration | |
| USERNAME = "kasimali" | |
| # Find all notebook files | |
| print("Searching for notebook files...") | |
| notebook_files = [] | |
| # Search in common locations | |
| search_paths = [ | |
| "/content/*.ipynb", | |
| "/content/**/*.ipynb", | |
| ] | |
| for pattern in search_paths: | |
| notebook_files.extend(glob.glob(pattern, recursive=True)) | |
| # Remove duplicates | |
| notebook_files = list(set(notebook_files)) | |
| print(f"\nFound {len(notebook_files)} notebook(s):") | |
| for i, nb in enumerate(notebook_files): | |
| print(f"{i+1}. {nb}") | |
| if len(notebook_files) == 0: | |
| print("\nNo notebooks found! Make sure you have .ipynb files in /content/") | |
| else: | |
| print(f"\nCreating {len(notebook_files)} separate Spaces...") | |
| api = HfApi() | |
| created_spaces = [] | |
| for notebook_path in notebook_files: | |
| # Get notebook name without extension | |
| notebook_name = os.path.basename(notebook_path).replace(".ipynb", "") | |
| # Clean name for Space (remove special chars, lowercase) | |
| space_name = notebook_name.lower().replace(" ", "-").replace("_", "-") | |
| space_id = f"{USERNAME}/{space_name}" | |
| print(f"\n{'='*60}") | |
| print(f"Processing: {notebook_name}") | |
| print(f"Space ID: {space_id}") | |
| try: | |
| # Create Space | |
| create_repo( | |
| repo_id=space_id, | |
| repo_type="space", | |
| space_sdk="static", | |
| exist_ok=True | |
| ) | |
| print(f"β Space created/exists") | |
| # Upload notebook | |
| api.upload_file( | |
| path_or_fileobj=notebook_path, | |
| path_in_repo=os.path.basename(notebook_path), | |
| repo_id=space_id, | |
| repo_type="space", | |
| commit_message=f"Upload {notebook_name}" | |
| ) | |
| print(f"β Notebook uploaded") | |
| # Create README | |
| readme = f"""--- | |
| title: {notebook_name} | |
| emoji: π | |
| colorFrom: blue | |
| colorTo: green | |
| sdk: static | |
| --- | |
| # {notebook_name} | |
| This Space contains the notebook: **{os.path.basename(notebook_path)}** | |
| ## Usage | |
| Download the notebook and run it in Google Colab or Jupyter. | |
| ## Files | |
| - {os.path.basename(notebook_path)} | |
| """ | |
| # Save and upload README | |
| readme_path = f"/tmp/README_{space_name}.md" | |
| with open(readme_path, "w") as f: | |
| f.write(readme) | |
| api.upload_file( | |
| path_or_fileobj=readme_path, | |
| path_in_repo="README.md", | |
| repo_id=space_id, | |
| repo_type="space", | |
| commit_message="Add README" | |
| ) | |
| print(f"β README added") | |
| created_spaces.append({ | |
| 'name': notebook_name, | |
| 'url': f"https://huggingface.co/spaces/{space_id}" | |
| }) | |
| except Exception as e: | |
| print(f"β Error: {e}") | |
| # Summary | |
| print(f"\n{'='*60}") | |
| print(f"SUMMARY: Created {len(created_spaces)} Spaces") | |
| print(f"{'='*60}") | |
| for space in created_spaces: | |
| print(f"\nπ {space['name']}") | |
| print(f" π {space['url']}") | |
| print(f"\nβ All done!") | |
| # Cell 7 | |
| from huggingface_hub import HfApi, create_repo, notebook_login | |
| import json | |
| # Login | |
| notebook_login() | |
| # Configuration | |
| USERNAME = "kasimali" | |
| SPACE_NAME = "whisper-malayalam-code" | |
| SPACE_ID = f"{USERNAME}/{SPACE_NAME}" | |
| # Get all code from current session | |
| print("Extracting code from Colab session...") | |
| # Get execution history | |
| from IPython import get_ipython | |
| ipython = get_ipython() | |
| # Get all executed code | |
| all_code = [] | |
| for i, cell in enumerate(ipython.user_ns.get('In', [])): | |
| if cell and cell.strip(): | |
| all_code.append(f"# Cell {i}\n{cell}\n") | |
| # Combine all code | |
| full_code = "\n\n".join(all_code) | |
| # Save as Python file | |
| print("Creating app.py...") | |
| app_content = f"""# Whisper Malayalam Fine-tuning Code | |
| # Extracted from Colab session | |
| {full_code} | |
| """ | |
| with open("app.py", "w") as f: | |
| f.write(app_content) | |
| print(f"β Created app.py ({len(full_code)} characters)") | |
| # Create Space | |
| print(f"\nCreating Space: {SPACE_ID}...") | |
| try: | |
| create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static", exist_ok=True) | |
| print("β Space created") | |
| except Exception as e: | |
| print(f"Space exists: {e}") | |
| # Upload | |
| api = HfApi() | |
| print("\nUploading app.py...") | |
| api.upload_file( | |
| path_or_fileobj="app.py", | |
| path_in_repo="app.py", | |
| repo_id=SPACE_ID, | |
| repo_type="space", | |
| commit_message="Upload code from Colab" | |
| ) | |
| # Create requirements.txt | |
| requirements = """datasets==3.1.0 | |
| transformers | |
| accelerate | |
| evaluate | |
| jiwer | |
| torch | |
| """ | |
| with open("requirements.txt", "w") as f: | |
| f.write(requirements) | |
| api.upload_file( | |
| path_or_fileobj="requirements.txt", | |
| path_in_repo="requirements.txt", | |
| repo_id=SPACE_ID, | |
| repo_type="space", | |
| commit_message="Add requirements" | |
| ) | |
| # Create README | |
| readme = f"""--- | |
| title: Whisper Malayalam Fine-tuning Code | |
| emoji: π€ | |
| colorFrom: blue | |
| colorTo: green | |
| sdk: static | |
| --- | |
| # Whisper Malayalam Fine-tuning Code | |
| This Space contains the Python code for fine-tuning Whisper on Malayalam. | |
| ## Files | |
| - `app.py`: Main training code (extracted from Colab) | |
| - `requirements.txt`: Python dependencies | |
| ## Usage | |
| # Cell 8 | |
| from huggingface_hub import HfApi, create_repo, notebook_login | |
| # Login | |
| notebook_login() | |
| # Configuration | |
| USERNAME = "kasimali" | |
| SPACE_NAME = "whisper-malayalam-code" | |
| SPACE_ID = f"{USERNAME}/{SPACE_NAME}" | |
| # Get all code from current session | |
| print("Extracting code from Colab session...") | |
| from IPython import get_ipython | |
| ipython = get_ipython() | |
| # Get all executed code | |
| all_code = [] | |
| for i, cell in enumerate(ipython.user_ns.get('In', [])): | |
| if cell and cell.strip(): | |
| all_code.append(f"# Cell {i}\n{cell}\n") | |
| # Combine all code | |
| full_code = "\n\n".join(all_code) | |
| # Save as Python file | |
| print("Creating app.py...") | |
| app_content = "# Whisper Malayalam Fine-tuning Code\n" | |
| app_content += "# Extracted from Colab session\n\n" | |
| app_content += full_code | |
| with open("app.py", "w") as f: | |
| f.write(app_content) | |
| print(f"Created app.py ({len(full_code)} characters)") | |
| # Create Space | |
| print(f"\nCreating Space: {SPACE_ID}...") | |
| try: | |
| create_repo(repo_id=SPACE_ID, repo_type="space", space_sdk="static", exist_ok=True) | |
| print("Space created") | |
| except: | |
| print("Space already exists") | |
| # Upload | |
| api = HfApi() | |
| print("\nUploading app.py...") | |
| api.upload_file( | |
| path_or_fileobj="app.py", | |
| path_in_repo="app.py", | |
| repo_id=SPACE_ID, | |
| repo_type="space", | |
| commit_message="Upload code from Colab" | |
| ) | |
| # Create requirements.txt | |
| requirements = "datasets==3.1.0\ntransformers\naccelerate\nevaluate\njiwer\ntorch\n" | |
| with open("requirements.txt", "w") as f: | |
| f.write(requirements) | |
| api.upload_file( | |
| path_or_fileobj="requirements.txt", | |
| path_in_repo="requirements.txt", | |
| repo_id=SPACE_ID, | |
| repo_type="space", | |
| commit_message="Add requirements" | |
| ) | |
| # Create README | |
| readme_content = "---\n" | |
| readme_content += "title: Whisper Malayalam Code\n" | |
| readme_content += "emoji: π€\n" | |
| readme_content += "colorFrom: blue\n" | |
| readme_content += "colorTo: green\n" | |
| readme_content += "sdk: static\n" | |
| readme_content += "---\n\n" | |
| readme_content += "# Whisper Malayalam Fine-tuning Code\n\n" | |
| readme_content += "Python code for fine-tuning Whisper on Malayalam.\n\n" | |
| readme_content += "## Files\n" | |
| readme_content += "- app.py: Main training code\n" | |
| readme_content += "- requirements.txt: Dependencies\n\n" | |
| readme_content += "## Usage\n" | |
| readme_content += "pip install -r requirements.txt\n" | |
| readme_content += "python app.py\n" | |
| with open("README.md", "w") as f: | |
| f.write(readme_content) | |
| api.upload_file( | |
| path_or_fileobj="README.md", | |
| path_in_repo="README.md", | |
| repo_id=SPACE_ID, | |
| repo_type="space", | |
| commit_message="Add README" | |
| ) | |
| print("\nSUCCESS!") | |
| print(f"Uploaded: app.py, requirements.txt, README.md") | |
| print(f"\nView at: https://huggingface.co/spaces/{SPACE_ID}") | |