github-actions commited on
Commit ·
46b55ef
0
Parent(s):
Clean sync: code and weights only.
Browse files- .gitattributes +3 -0
- .github/workflows/sync_to_hf.yml +50 -0
- .gitignore +10 -0
- README.md +72 -0
- app.py +114 -0
- environment.yml +554 -0
- notebooks/lyricloop.ipynb +0 -0
- requirements.txt +10 -0
- src/lyricloop/__init__.py +4 -0
- src/lyricloop/config.py +36 -0
- src/lyricloop/data.py +136 -0
- src/lyricloop/environment.py +64 -0
- src/lyricloop/metrics.py +105 -0
- src/lyricloop/viz.py +155 -0
.gitattributes
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
.github/workflows/sync_to_hf.yml
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Sync to Hugging Face Hub
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
|
| 7 |
+
# Allows you to run this workflow manually from the Actions tab
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
sync-to-hub:
|
| 12 |
+
runs-on: ubuntu-latest
|
| 13 |
+
steps:
|
| 14 |
+
- name: Checkout Repository
|
| 15 |
+
uses: actions/checkout@v4
|
| 16 |
+
with:
|
| 17 |
+
fetch-depth: 0
|
| 18 |
+
lfs: true
|
| 19 |
+
|
| 20 |
+
- name: Pull LFS objects
|
| 21 |
+
run: |
|
| 22 |
+
git lfs install
|
| 23 |
+
git lfs pull
|
| 24 |
+
|
| 25 |
+
- name: Push to Hub
|
| 26 |
+
env:
|
| 27 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 28 |
+
run: |
|
| 29 |
+
# Remove the folders Hugging Face does not want (but GitHub will keep)
|
| 30 |
+
rm -rf assets/
|
| 31 |
+
rm -rf reports/
|
| 32 |
+
|
| 33 |
+
# Delete temporary runner's git history
|
| 34 |
+
rm -rf .git/
|
| 35 |
+
|
| 36 |
+
# Reinitialize a brand new history for HF (does not affect GitHub)
|
| 37 |
+
git init
|
| 38 |
+
git branch -M main
|
| 39 |
+
|
| 40 |
+
# Create a temporary git identity
|
| 41 |
+
git config user.name "github-actions"
|
| 42 |
+
git config user.email "actions@github.com"
|
| 43 |
+
|
| 44 |
+
# Finalize and push
|
| 45 |
+
git lfs install
|
| 46 |
+
git add .
|
| 47 |
+
git commit -m "Clean sync: code and weights only."
|
| 48 |
+
|
| 49 |
+
# Push flat history to HF
|
| 50 |
+
git push --force https://lxtung95:$HF_TOKEN@huggingface.co/lxtung95/lyricloop-llm main
|
.gitignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.DS_Store
|
| 2 |
+
.streamlit/
|
| 3 |
+
*.pt
|
| 4 |
+
*.bin
|
| 5 |
+
__pycache__/
|
| 6 |
+
.ipynb_checkpoints/
|
| 7 |
+
data/
|
| 8 |
+
wandb/
|
| 9 |
+
models/
|
| 10 |
+
experiments/
|
README.md
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NAME
|
| 2 |
+
|
| 3 |
+
LyricLoop LLM
|
| 4 |
+
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
PROJECT OBJECTIVE
|
| 8 |
+
|
| 9 |
+
LyricLoop bridges the gap between semantic LLM text and professional musical phrasing. This framework fine-tunes Google's Gemma-2b-it to generate lyrics adhering to specific structures (Verse, Chorus, Bridge) and genre-specific stylings (Electronic, Pop, Rock, Hip-Hop).
|
| 10 |
+
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
LANGUAGE / STACK
|
| 14 |
+
|
| 15 |
+
Python | PyTorch, Hugging Face (Transformers, PEFT, TRL), Streamlit
|
| 16 |
+
|
| 17 |
+
---
|
| 18 |
+
|
| 19 |
+
TECHNICAL METHODOLOGY
|
| 20 |
+
|
| 21 |
+
- Fine-Tuning: Implemented Low-Rank Adaptation (LoRA) to specialize the model in rhythmic patterns while preserving base reasoning.
|
| 22 |
+
- Optimization: Used 4-bit Quantization (QLoRA) via bitsandbytes to reduce the memory footprint during training.
|
| 23 |
+
- Instruction Tuning: Supervised Fine-Tuning (SFT) with custom templates to enforce structural and genre constraints.
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
PROJECT STRUCTURE
|
| 28 |
+
|
| 29 |
+
- app.py: main streamlit application entry point and UI logic.
|
| 30 |
+
- src/lyricloop/: core modular package containing engine logic:
|
| 31 |
+
- config.py: global constants and path management.
|
| 32 |
+
- data.py: prompt engineering and dataset preprocessing.
|
| 33 |
+
- environment.py: hardware-aware setup (MPS/CPU/CUDA).
|
| 34 |
+
- metrics.py: inference execution and perplexity scoring.
|
| 35 |
+
- viz.py: standardized plotting and visual utilities.
|
| 36 |
+
- notebooks/: development playground, training workflows, and EDA.
|
| 37 |
+
- reports/: written technical documentation and project summaries.
|
| 38 |
+
- assets/: visual artifacts and plots used in documentation.
|
| 39 |
+
- requirements.txt: dependency management for environment parity.
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
DATA & SOURCE
|
| 44 |
+
|
| 45 |
+
- Corpus: 5mm+ Song Lyrics (Genius Dataset).
|
| 46 |
+
- Metadata: Artist mapping via Pitchfork Reviews.
|
| 47 |
+
- Stack: Python, Hugging Face (Transformers, PEFT, TRL), PyTorch, and Google Colab (L4 GPU).
|
| 48 |
+
|
| 49 |
+
---
|
| 50 |
+
|
| 51 |
+
EXTERNAL RESOURCES
|
| 52 |
+
|
| 53 |
+
- Full Project Workspace (Google Drive): [Access the Notebooks & Raw Data](https://drive.google.com/drive/folders/1M5SJRaaK8OaskUgEsBupgGVN_-fQS3i4?usp=sharing)
|
| 54 |
+
- Training Environment: Google Colab (L4 GPU)
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
STUDIO GUIDE
|
| 59 |
+
|
| 60 |
+
- Run on Hugging Face lxtung95/lyricloop
|
| 61 |
+
- App URL: https://lxtung95-lyricloop.hf.space/
|
| 62 |
+
1. Details: Enter a song title and an Artist Aesthetic (e.g., Taylor Swift) to set the tone.
|
| 63 |
+
2. Genre: Select your target genre to adjust rhythmic density.
|
| 64 |
+
3. Compose: Use the Creativity (Temperature) slider to control experimental word choice.
|
| 65 |
+
4. Export: Download the final composition as a .txt file for your creative workflow.
|
| 66 |
+
|
| 67 |
+
---
|
| 68 |
+
|
| 69 |
+
SUPPORT
|
| 70 |
+
|
| 71 |
+
Visit my GitHub repository for the latest scripts and downloads:
|
| 72 |
+
https://github.com/lxntung95
|
app.py
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import torch
|
| 3 |
+
import sys
|
| 4 |
+
import os
|
| 5 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 6 |
+
from peft import PeftModel
|
| 7 |
+
|
| 8 |
+
# Path Setup: ensure the app can see the modular package
|
| 9 |
+
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
|
| 10 |
+
sys.path.append(os.path.join(PROJECT_ROOT, 'src'))
|
| 11 |
+
|
| 12 |
+
from lyricloop.config import MODEL_ID, RANDOM_STATE
|
| 13 |
+
from lyricloop.data import build_inference_prompt, format_lyrics
|
| 14 |
+
from lyricloop.metrics import execute_generation
|
| 15 |
+
from lyricloop.environment import set_seed
|
| 16 |
+
|
| 17 |
+
# Page configuration
|
| 18 |
+
st.set_page_config(page_title="LyricLoop v2.0", page_icon="🎤", layout="wide")
|
| 19 |
+
|
| 20 |
+
# Cached model loading
|
| 21 |
+
@st.cache_resource
|
| 22 |
+
def load_studio_engine():
|
| 23 |
+
"""Initializes the Gemma-2b engine for Hugging Face Spaces (CPU)."""
|
| 24 |
+
set_seed(RANDOM_STATE)
|
| 25 |
+
|
| 26 |
+
# Retrieve the token from Hugging Face Space Secrets
|
| 27 |
+
hf_token = st.secrets["HF_TOKEN"]
|
| 28 |
+
|
| 29 |
+
# Use the token in the tokenizer and model loading
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=hf_token)
|
| 31 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 32 |
+
|
| 33 |
+
# Free Tier uses CPU (forcing float32 for stability)
|
| 34 |
+
device = "cpu"
|
| 35 |
+
dtype = torch.float32
|
| 36 |
+
|
| 37 |
+
# Load base model skeleton
|
| 38 |
+
base_model = AutoModelForCausalLM.from_pretrained(
|
| 39 |
+
MODEL_ID,
|
| 40 |
+
dtype=dtype,
|
| 41 |
+
device_map=device,
|
| 42 |
+
token=hf_token
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Point to the Hugging Face Model Repository for the adapters
|
| 46 |
+
adapter_repo = "lxtung95/lyricloop"
|
| 47 |
+
|
| 48 |
+
model = PeftModel.from_pretrained(
|
| 49 |
+
base_model,
|
| 50 |
+
adapter_repo,
|
| 51 |
+
token=hf_token
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
return model, tokenizer
|
| 55 |
+
|
| 56 |
+
# Studio Interface
|
| 57 |
+
st.title("LyricLoop v2.0")
|
| 58 |
+
st.caption("Professional AI Songwriting Framework | Powered by Gemma-2b")
|
| 59 |
+
st.markdown("---")
|
| 60 |
+
|
| 61 |
+
# Sidebar Configuration
|
| 62 |
+
st.sidebar.header("Studio Controls")
|
| 63 |
+
creativity = st.sidebar.slider("Creativity (Temperature)", 0.5, 1.2, 0.85)
|
| 64 |
+
token_limit = st.sidebar.number_input("Max Tokens", 100, 500, 300)
|
| 65 |
+
|
| 66 |
+
# Main Input Columns
|
| 67 |
+
col1, col2 = st.columns([1, 1])
|
| 68 |
+
|
| 69 |
+
with col1:
|
| 70 |
+
st.subheader("Composition Details")
|
| 71 |
+
genre = st.selectbox("Target Genre", ["Pop", "Rock", "Hip-hop", "Electronic", "R&B", "Country"])
|
| 72 |
+
artist = st.text_input("Artist Aesthetic", placeholder="e.g., Taylor Swift")
|
| 73 |
+
title = st.text_input("Song Title", placeholder="Enter your track title...")
|
| 74 |
+
|
| 75 |
+
generate_btn = st.button("Compose Lyrics", type="primary", use_container_width=True)
|
| 76 |
+
|
| 77 |
+
with col2:
|
| 78 |
+
st.subheader("Output")
|
| 79 |
+
|
| 80 |
+
# Create a persistent placeholder for the output
|
| 81 |
+
output_placeholder = st.empty()
|
| 82 |
+
|
| 83 |
+
if generate_btn:
|
| 84 |
+
with st.spinner("Model is writing..."):
|
| 85 |
+
# Load Engine
|
| 86 |
+
model, tokenizer = load_studio_engine()
|
| 87 |
+
|
| 88 |
+
# Build Inference Prompt
|
| 89 |
+
prompt = build_inference_prompt(genre, artist, title)
|
| 90 |
+
|
| 91 |
+
# Generate Lyrics
|
| 92 |
+
raw_output = execute_generation(
|
| 93 |
+
model, tokenizer, prompt,
|
| 94 |
+
max_tokens=token_limit,
|
| 95 |
+
temperature=creativity,
|
| 96 |
+
do_sample=True
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Post-process formatting
|
| 100 |
+
clean_lyrics = format_lyrics(raw_output)
|
| 101 |
+
|
| 102 |
+
# Update the placeholder specifically
|
| 103 |
+
output_placeholder.text_area(
|
| 104 |
+
"Final Draft",
|
| 105 |
+
clean_lyrics,
|
| 106 |
+
height=400,
|
| 107 |
+
key="lyrics_output" # adding a key helps mobile state persistence
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
st.download_button(
|
| 111 |
+
"Export as TXT",
|
| 112 |
+
clean_lyrics,
|
| 113 |
+
file_name=f"{title}_lyrics.txt"
|
| 114 |
+
)
|
environment.yml
ADDED
|
@@ -0,0 +1,554 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: ds
|
| 2 |
+
channels:
|
| 3 |
+
- defaults
|
| 4 |
+
dependencies:
|
| 5 |
+
- _anaconda_depends=2025.06=py313_openblas_2
|
| 6 |
+
- aiobotocore=2.19.0=py313hca03da5_0
|
| 7 |
+
- aiohappyeyeballs=2.4.4=py313hca03da5_0
|
| 8 |
+
- aiohttp=3.11.10=py313h80987f9_0
|
| 9 |
+
- aioitertools=0.7.1=pyhd3eb1b0_0
|
| 10 |
+
- aiosignal=1.2.0=pyhd3eb1b0_0
|
| 11 |
+
- alabaster=0.7.16=py313hca03da5_0
|
| 12 |
+
- alembic=1.17.2=py313hca03da5_0
|
| 13 |
+
- altair=5.5.0=py313hca03da5_0
|
| 14 |
+
- anaconda-catalogs=0.2.0=py313hca03da5_2
|
| 15 |
+
- annotated-types=0.6.0=py313hca03da5_0
|
| 16 |
+
- anyio=4.7.0=py313hca03da5_0
|
| 17 |
+
- appdirs=1.4.4=pyhd3eb1b0_0
|
| 18 |
+
- applaunchservices=0.3.0=py313hca03da5_0
|
| 19 |
+
- appnope=0.1.3=py313hca03da5_0
|
| 20 |
+
- appscript=1.3.0=py313h80987f9_0
|
| 21 |
+
- archspec=0.2.3=pyhd3eb1b0_0
|
| 22 |
+
- argon2-cffi=21.3.0=pyhd3eb1b0_0
|
| 23 |
+
- argon2-cffi-bindings=21.2.0=py313h80987f9_1
|
| 24 |
+
- arrow=1.3.0=py313hca03da5_0
|
| 25 |
+
- arrow-cpp=19.0.0=h0b7d223_2
|
| 26 |
+
- astroid=3.3.8=py313hca03da5_0
|
| 27 |
+
- astropy=7.0.0=py313h80987f9_0
|
| 28 |
+
- astropy-iers-data=0.2025.1.13.0.34.51=py313hca03da5_0
|
| 29 |
+
- asttokens=3.0.0=py313hca03da5_0
|
| 30 |
+
- async-lru=2.0.4=py313hca03da5_0
|
| 31 |
+
- asyncssh=2.17.0=py313hca03da5_0
|
| 32 |
+
- atomicwrites=1.4.0=py_0
|
| 33 |
+
- attrs=24.3.0=py313hca03da5_0
|
| 34 |
+
- automat=24.8.1=py313hca03da5_0
|
| 35 |
+
- autopep8=2.0.4=pyhd3eb1b0_0
|
| 36 |
+
- aws-c-auth=0.6.19=h80987f9_0
|
| 37 |
+
- aws-c-cal=0.5.20=h80987f9_0
|
| 38 |
+
- aws-c-common=0.8.5=h80987f9_0
|
| 39 |
+
- aws-c-compression=0.2.16=h80987f9_0
|
| 40 |
+
- aws-c-event-stream=0.2.15=h313beb8_0
|
| 41 |
+
- aws-c-http=0.6.25=h80987f9_0
|
| 42 |
+
- aws-c-io=0.13.10=h80987f9_0
|
| 43 |
+
- aws-c-mqtt=0.7.13=h80987f9_0
|
| 44 |
+
- aws-c-s3=0.1.51=h80987f9_0
|
| 45 |
+
- aws-c-sdkutils=0.1.6=h80987f9_0
|
| 46 |
+
- aws-checksums=0.1.13=h80987f9_0
|
| 47 |
+
- aws-crt-cpp=0.18.16=h313beb8_0
|
| 48 |
+
- aws-sdk-cpp=1.11.212=hdd7fb2f_0
|
| 49 |
+
- babel=2.16.0=py313hca03da5_0
|
| 50 |
+
- bcrypt=4.3.0=py313h2aea54e_0
|
| 51 |
+
- beautifulsoup4=4.12.3=py313hca03da5_0
|
| 52 |
+
- binaryornot=0.4.4=pyhd3eb1b0_1
|
| 53 |
+
- black=24.10.0=py313hca03da5_0
|
| 54 |
+
- blas=1.0=openblas
|
| 55 |
+
- bleach=6.2.0=py313hca03da5_0
|
| 56 |
+
- blinker=1.9.0=py313hca03da5_0
|
| 57 |
+
- blosc=1.21.3=h313beb8_0
|
| 58 |
+
- bokeh=3.6.2=py313hca03da5_0
|
| 59 |
+
- boltons=24.1.0=py313hca03da5_0
|
| 60 |
+
- boost-cpp=1.82.0=h48ca7d4_2
|
| 61 |
+
- botocore=1.36.3=py313hca03da5_0
|
| 62 |
+
- bottleneck=1.4.2=py313ha35b7ea_0
|
| 63 |
+
- brotli-python=1.0.9=py313h313beb8_9
|
| 64 |
+
- bzip2=1.0.8=h80987f9_6
|
| 65 |
+
- c-ares=1.19.1=h80987f9_0
|
| 66 |
+
- c-blosc2=2.17.1=hca023f9_0
|
| 67 |
+
- ca-certificates=2025.11.4=hca03da5_0
|
| 68 |
+
- cachetools=5.5.1=py313hca03da5_0
|
| 69 |
+
- cairo=1.18.4=h191e429_0
|
| 70 |
+
- cattrs=24.1.2=py313hca03da5_0
|
| 71 |
+
- cctools=949.0.1=hc179dcd_25
|
| 72 |
+
- cctools_osx-arm64=949.0.1=h332cad3_25
|
| 73 |
+
- certifi=2025.11.12=py313hca03da5_0
|
| 74 |
+
- cffi=1.17.1=py313h3eb5a62_1
|
| 75 |
+
- chardet=4.0.0=py313hca03da5_1003
|
| 76 |
+
- charset-normalizer=3.3.2=pyhd3eb1b0_0
|
| 77 |
+
- click=8.1.8=py313hca03da5_0
|
| 78 |
+
- cloudpickle=3.0.0=py313hca03da5_0
|
| 79 |
+
- colorama=0.4.6=py313hca03da5_0
|
| 80 |
+
- colorcet=3.1.0=py313hca03da5_0
|
| 81 |
+
- comm=0.2.1=py313hca03da5_0
|
| 82 |
+
- conda-content-trust=0.2.0=py313hca03da5_1
|
| 83 |
+
- conda-pack=0.7.1=py313hca03da5_0
|
| 84 |
+
- conda-package-handling=2.4.0=py313hca03da5_0
|
| 85 |
+
- conda-package-streaming=0.11.0=py313hca03da5_0
|
| 86 |
+
- conda-repo-cli=1.0.165=py313hca03da5_0
|
| 87 |
+
- constantly=23.10.4=py313hca03da5_0
|
| 88 |
+
- contourpy=1.3.1=py313h48ca7d4_0
|
| 89 |
+
- cookiecutter=1.7.3=pyhd3eb1b0_0
|
| 90 |
+
- cpp-expected=1.1.0=h48ca7d4_0
|
| 91 |
+
- cryptography=44.0.1=py313h8026fc7_0
|
| 92 |
+
- cssselect=1.2.0=py313hca03da5_0
|
| 93 |
+
- curl=8.12.1=h3e2b118_0
|
| 94 |
+
- cycler=0.11.0=pyhd3eb1b0_0
|
| 95 |
+
- cyrus-sasl=2.1.28=h9131b1a_1
|
| 96 |
+
- cytoolz=1.0.1=py313h80987f9_0
|
| 97 |
+
- dask=2025.2.0=py313hca03da5_0
|
| 98 |
+
- dask-core=2025.2.0=py313hca03da5_0
|
| 99 |
+
- dask-expr=2.0.0=py313hca03da5_0
|
| 100 |
+
- datashader=0.18.0=py313hca03da5_0
|
| 101 |
+
- debugpy=1.8.11=py313h313beb8_0
|
| 102 |
+
- decorator=5.1.1=pyhd3eb1b0_0
|
| 103 |
+
- defusedxml=0.7.1=pyhd3eb1b0_0
|
| 104 |
+
- deprecated=1.2.13=py313hca03da5_0
|
| 105 |
+
- diff-match-patch=20200713=pyhd3eb1b0_0
|
| 106 |
+
- dill=0.3.8=py313hca03da5_0
|
| 107 |
+
- distributed=2025.2.0=py313hca03da5_0
|
| 108 |
+
- distro=1.9.0=py313hca03da5_0
|
| 109 |
+
- dmglib=0.9.5=py313h2d4777b_1
|
| 110 |
+
- docstring-to-markdown=0.11=py313hca03da5_0
|
| 111 |
+
- docutils=0.21.2=py313hca03da5_0
|
| 112 |
+
- et_xmlfile=1.1.0=py313hca03da5_1
|
| 113 |
+
- evalidate=2.0.3=py313hca03da5_0
|
| 114 |
+
- executing=0.8.3=pyhd3eb1b0_0
|
| 115 |
+
- expat=2.7.1=h313beb8_0
|
| 116 |
+
- filelock=3.17.0=py313hca03da5_0
|
| 117 |
+
- flake8=7.1.1=py313hca03da5_0
|
| 118 |
+
- flask=3.1.0=py313hca03da5_0
|
| 119 |
+
- fmt=9.1.0=h48ca7d4_1
|
| 120 |
+
- fontconfig=2.15.0=h29935d0_0
|
| 121 |
+
- fonttools=4.55.3=py313h80987f9_0
|
| 122 |
+
- freetype=2.13.3=h47d26ad_0
|
| 123 |
+
- frozendict=2.4.2=py313hca03da5_0
|
| 124 |
+
- frozenlist=1.5.0=py313h80987f9_0
|
| 125 |
+
- fsspec=2025.3.2=py313h7eb115d_0
|
| 126 |
+
- gettext=0.21.0=hbdbcc25_2
|
| 127 |
+
- gflags=2.2.2=h313beb8_1
|
| 128 |
+
- gitdb=4.0.7=pyhd3eb1b0_0
|
| 129 |
+
- gitpython=3.1.43=py313hca03da5_0
|
| 130 |
+
- glib=2.84.2=hc880cf1_0
|
| 131 |
+
- glib-tools=2.84.2=hc880cf1_0
|
| 132 |
+
- glog=0.5.0=h313beb8_1
|
| 133 |
+
- gmp=6.3.0=h313beb8_0
|
| 134 |
+
- gmpy2=2.2.1=py313h5c1b81f_0
|
| 135 |
+
- graphite2=1.3.14=hc377ac9_1
|
| 136 |
+
- greenlet=3.1.1=py313h313beb8_0
|
| 137 |
+
- gst-plugins-base=1.14.1=h313beb8_1
|
| 138 |
+
- gstreamer=1.14.1=h80987f9_1
|
| 139 |
+
- h11=0.16.0=py313hca03da5_0
|
| 140 |
+
- h5py=3.12.1=py313h0957e0b_1
|
| 141 |
+
- harfbuzz=10.2.0=he637ebf_1
|
| 142 |
+
- hdf5=1.14.5=hd77251f_2
|
| 143 |
+
- heapdict=1.0.1=pyhd3eb1b0_0
|
| 144 |
+
- holoviews=1.20.2=py313hca03da5_0
|
| 145 |
+
- httpcore=1.0.9=py313hca03da5_0
|
| 146 |
+
- httpx=0.28.1=py313hca03da5_0
|
| 147 |
+
- hvplot=0.11.3=py313hca03da5_0
|
| 148 |
+
- hyperlink=21.0.0=pyhd3eb1b0_0
|
| 149 |
+
- icu=73.1=h313beb8_0
|
| 150 |
+
- idna=3.7=py313hca03da5_0
|
| 151 |
+
- imageio=2.37.0=py313hca03da5_0
|
| 152 |
+
- imagesize=1.4.1=py313hca03da5_0
|
| 153 |
+
- imbalanced-learn=0.13.0=py313hca03da5_0
|
| 154 |
+
- importlib-metadata=8.5.0=py313hca03da5_0
|
| 155 |
+
- incremental=24.7.2=pyhd3eb1b0_0
|
| 156 |
+
- inflection=0.5.1=py313hca03da5_1
|
| 157 |
+
- iniconfig=1.1.1=pyhd3eb1b0_0
|
| 158 |
+
- intake=2.0.7=py313hca03da5_0
|
| 159 |
+
- intervaltree=3.1.0=pyhd3eb1b0_0
|
| 160 |
+
- ipykernel=6.29.5=py313hca03da5_1
|
| 161 |
+
- ipython=8.30.0=py313hca03da5_0
|
| 162 |
+
- ipython_pygments_lexers=1.1.1=py313hca03da5_0
|
| 163 |
+
- ipywidgets=8.1.5=py313hca03da5_0
|
| 164 |
+
- isort=6.0.1=py313hca03da5_0
|
| 165 |
+
- itemadapter=0.3.0=pyhd3eb1b0_0
|
| 166 |
+
- itemloaders=1.3.2=py313hca03da5_0
|
| 167 |
+
- itsdangerous=2.2.0=py313hca03da5_0
|
| 168 |
+
- jaraco.classes=3.2.1=pyhd3eb1b0_0
|
| 169 |
+
- jaraco.context=6.0.0=py313hca03da5_0
|
| 170 |
+
- jaraco.functools=4.1.0=py313hca03da5_0
|
| 171 |
+
- jedi=0.19.2=py313hca03da5_0
|
| 172 |
+
- jellyfish=1.1.3=py313h1bd1ac0_0
|
| 173 |
+
- jinja2=3.1.6=py313hca03da5_0
|
| 174 |
+
- jinja2-time=0.2.0=pyhd3eb1b0_3
|
| 175 |
+
- jmespath=1.0.1=py313hca03da5_0
|
| 176 |
+
- joblib=1.4.2=py313hca03da5_0
|
| 177 |
+
- jpeg=9e=h80987f9_3
|
| 178 |
+
- jq=1.7.1=h80987f9_0
|
| 179 |
+
- json5=0.9.25=py313hca03da5_0
|
| 180 |
+
- jsonpatch=1.33=py313hca03da5_1
|
| 181 |
+
- jsonpointer=2.1=pyhd3eb1b0_0
|
| 182 |
+
- jsonschema=4.23.0=py313hca03da5_0
|
| 183 |
+
- jsonschema-specifications=2023.7.1=py313hca03da5_0
|
| 184 |
+
- jupyter=1.1.1=py313hca03da5_0
|
| 185 |
+
- jupyter-lsp=2.2.5=py313hca03da5_0
|
| 186 |
+
- jupyter_client=8.6.3=py313hca03da5_0
|
| 187 |
+
- jupyter_console=6.6.3=py313hca03da5_1
|
| 188 |
+
- jupyter_core=5.7.2=py313hca03da5_0
|
| 189 |
+
- jupyter_events=0.12.0=py313hca03da5_0
|
| 190 |
+
- jupyter_server=2.15.0=py313hca03da5_0
|
| 191 |
+
- jupyter_server_terminals=0.5.3=py313hca03da5_0
|
| 192 |
+
- jupyterlab=4.3.4=py313hca03da5_0
|
| 193 |
+
- jupyterlab-variableinspector=3.2.4=py313hca03da5_0
|
| 194 |
+
- jupyterlab_pygments=0.3.0=py313hca03da5_0
|
| 195 |
+
- jupyterlab_server=2.27.3=py313hca03da5_0
|
| 196 |
+
- jupyterlab_widgets=3.0.13=py313hca03da5_0
|
| 197 |
+
- keyring=25.6.0=py313hca03da5_0
|
| 198 |
+
- kiwisolver=1.4.8=py313h313beb8_0
|
| 199 |
+
- krb5=1.20.1=hf3e1bf2_1
|
| 200 |
+
- lazy_loader=0.4=py313hca03da5_0
|
| 201 |
+
- lcms2=2.16=he26ebf3_1
|
| 202 |
+
- ld64=530=hb29bf3f_25
|
| 203 |
+
- ld64_osx-arm64=530=h001ce53_25
|
| 204 |
+
- ldid=2.1.5=h20b2a84_3
|
| 205 |
+
- lerc=4.0.0=h313beb8_0
|
| 206 |
+
- libabseil=20250127.0=cxx17_h313beb8_0
|
| 207 |
+
- libarchive=3.7.7=h8f13d7a_0
|
| 208 |
+
- libboost=1.82.0=h0bc93f9_2
|
| 209 |
+
- libbrotlicommon=1.0.9=h80987f9_9
|
| 210 |
+
- libbrotlidec=1.0.9=h80987f9_9
|
| 211 |
+
- libbrotlienc=1.0.9=h80987f9_9
|
| 212 |
+
- libclang=20.1.8=default_h988c893_0
|
| 213 |
+
- libclang13=20.1.8=default_h9231c17_0
|
| 214 |
+
- libcurl=8.12.1=hde089ae_0
|
| 215 |
+
- libcxx=20.1.8=hd7fd590_1
|
| 216 |
+
- libdeflate=1.22=h80987f9_0
|
| 217 |
+
- libedit=3.1.20230828=h80987f9_0
|
| 218 |
+
- libev=4.33=h1a28f6b_1
|
| 219 |
+
- libevent=2.1.12=h02f6b3c_1
|
| 220 |
+
- libffi=3.4.4=hca03da5_1
|
| 221 |
+
- libgfortran=5.0.0=11_3_0_hca03da5_28
|
| 222 |
+
- libgfortran5=11.3.0=h009349e_28
|
| 223 |
+
- libglib=2.84.2=hdc2269c_0
|
| 224 |
+
- libgrpc=1.71.0=h62f6fdd_0
|
| 225 |
+
- libiconv=1.16=h80987f9_3
|
| 226 |
+
- liblief=0.16.4=h313beb8_0
|
| 227 |
+
- libllvm14=14.0.6=h19fdd8a_4
|
| 228 |
+
- libllvm20=20.1.8=h1701f07_0
|
| 229 |
+
- libmamba=2.0.5=h15e39b3_1
|
| 230 |
+
- libmambapy=2.0.5=py313h48ca7d4_1
|
| 231 |
+
- libmpdec=4.0.0=h80987f9_0
|
| 232 |
+
- libnghttp2=1.57.0=h62f6fdd_0
|
| 233 |
+
- libopenblas=0.3.30=hf2bb037_2
|
| 234 |
+
- libopus=1.3.1=h80987f9_1
|
| 235 |
+
- libpng=1.6.39=h80987f9_0
|
| 236 |
+
- libpq=17.4=h02f6b3c_0
|
| 237 |
+
- libprotobuf=5.29.3=h9f9f828_0
|
| 238 |
+
- libre2-11=2024.07.02=h313beb8_0
|
| 239 |
+
- libsodium=1.0.18=h1a28f6b_0
|
| 240 |
+
- libsolv=0.7.30=h514c7bf_1
|
| 241 |
+
- libspatialindex=1.9.3=hc377ac9_0
|
| 242 |
+
- libssh2=1.11.1=h3e2b118_0
|
| 243 |
+
- libthrift=0.15.0=h73c2103_2
|
| 244 |
+
- libtiff=4.7.0=h91aec0a_0
|
| 245 |
+
- libvpx=1.13.1=h313beb8_0
|
| 246 |
+
- libwebp-base=1.3.2=h80987f9_1
|
| 247 |
+
- libxml2=2.13.8=h0b34f26_0
|
| 248 |
+
- libxslt=1.1.41=hf4d3faa_0
|
| 249 |
+
- linkify-it-py=2.0.0=py313hca03da5_0
|
| 250 |
+
- llvm-openmp=14.0.6=hc6e5704_0
|
| 251 |
+
- llvmlite=0.44.0=py313heb35c27_1
|
| 252 |
+
- locket=1.0.0=py313hca03da5_0
|
| 253 |
+
- lsprotocol=2025.0.0=py313hca03da5_0
|
| 254 |
+
- lxml=5.3.0=py313h1d4350b_1
|
| 255 |
+
- lz4=4.3.2=py313h80987f9_1
|
| 256 |
+
- lz4-c=1.9.4=h313beb8_1
|
| 257 |
+
- lzo=2.10=h1a28f6b_2
|
| 258 |
+
- mako=1.3.10=py313hca03da5_0
|
| 259 |
+
- markdown=3.8=py313hca03da5_0
|
| 260 |
+
- markdown-it-py=2.2.0=py313hca03da5_1
|
| 261 |
+
- markupsafe=3.0.2=py313h80987f9_0
|
| 262 |
+
- matplotlib=3.10.0=py313hca03da5_1
|
| 263 |
+
- matplotlib-base=3.10.0=py313hb68df00_0
|
| 264 |
+
- matplotlib-inline=0.1.6=py313hca03da5_0
|
| 265 |
+
- mbedtls=3.5.1=h313beb8_1
|
| 266 |
+
- mccabe=0.7.0=pyhd3eb1b0_0
|
| 267 |
+
- mdit-py-plugins=0.3.0=py313hca03da5_0
|
| 268 |
+
- mdurl=0.1.0=py313hca03da5_0
|
| 269 |
+
- menuinst=2.2.0=py313hca03da5_1
|
| 270 |
+
- mistune=3.1.2=py313hca03da5_0
|
| 271 |
+
- more-itertools=10.3.0=py313hca03da5_0
|
| 272 |
+
- mpc=1.3.1=h80987f9_0
|
| 273 |
+
- mpfr=4.2.1=h80987f9_0
|
| 274 |
+
- mpmath=1.3.0=py313hca03da5_0
|
| 275 |
+
- msgpack-python=1.0.3=py313h48ca7d4_0
|
| 276 |
+
- multidict=6.1.0=py313h80987f9_0
|
| 277 |
+
- multipledispatch=0.6.0=py313hca03da5_0
|
| 278 |
+
- mypy=1.14.1=py313h80987f9_0
|
| 279 |
+
- mypy_extensions=1.0.0=py313hca03da5_0
|
| 280 |
+
- mysql=8.4.0=h065ec36_2
|
| 281 |
+
- mysql-common=9.3.0=h0968ce5_3
|
| 282 |
+
- mysql-libs=9.3.0=ha948bd4_3
|
| 283 |
+
- narwhals=1.31.0=py313hca03da5_1
|
| 284 |
+
- nb_conda_kernels=2.5.2=py313hca03da5_2
|
| 285 |
+
- nbclient=0.10.2=py313hca03da5_0
|
| 286 |
+
- nbconvert=7.16.6=py313hca03da5_0
|
| 287 |
+
- nbconvert-core=7.16.6=py313hca03da5_0
|
| 288 |
+
- nbconvert-pandoc=7.16.6=py313hca03da5_0
|
| 289 |
+
- nbformat=5.10.4=py313hca03da5_0
|
| 290 |
+
- ncurses=6.4=h313beb8_0
|
| 291 |
+
- nest-asyncio=1.6.0=py313hca03da5_0
|
| 292 |
+
- networkx=3.4.2=py313hca03da5_0
|
| 293 |
+
- nlohmann_json=3.11.2=h313beb8_0
|
| 294 |
+
- nltk=3.9.1=py313h0ef513f_0
|
| 295 |
+
- notebook=7.3.2=py313hca03da5_1
|
| 296 |
+
- notebook-shim=0.2.4=py313hca03da5_0
|
| 297 |
+
- numba=0.61.0=py313h313beb8_1
|
| 298 |
+
- numexpr=2.10.1=py313h5d9532f_0
|
| 299 |
+
- numpy=2.1.3=py313hf2f81dc_3
|
| 300 |
+
- numpy-base=2.1.3=py313h2a02f3f_3
|
| 301 |
+
- numpydoc=1.2=pyhd3eb1b0_0
|
| 302 |
+
- oniguruma=6.9.7.1=h1a28f6b_0
|
| 303 |
+
- openjpeg=2.5.2=hba36e21_1
|
| 304 |
+
- openldap=2.6.4=he7ef289_0
|
| 305 |
+
- openpyxl=3.1.5=py313h80987f9_1
|
| 306 |
+
- openssl=3.0.18=h9b4081a_0
|
| 307 |
+
- orc=2.1.1=h55d209b_0
|
| 308 |
+
- overrides=7.4.0=py313hca03da5_0
|
| 309 |
+
- packaging=24.2=py313hca03da5_0
|
| 310 |
+
- pandas=2.2.3=py313hcf29cfe_0
|
| 311 |
+
- pandoc=2.12=hca03da5_3
|
| 312 |
+
- pandocfilters=1.5.0=pyhd3eb1b0_0
|
| 313 |
+
- panel=1.7.0=py313hca03da5_0
|
| 314 |
+
- param=2.2.0=py313hca03da5_0
|
| 315 |
+
- parsel=1.8.1=py313hca03da5_0
|
| 316 |
+
- parso=0.8.4=py313hca03da5_0
|
| 317 |
+
- partd=1.4.2=py313hca03da5_0
|
| 318 |
+
- patch=2.7.6=h1a28f6b_1001
|
| 319 |
+
- pathspec=0.10.3=py313hca03da5_0
|
| 320 |
+
- patsy=1.0.1=py313hca03da5_0
|
| 321 |
+
- pcre2=10.42=hb066dcc_1
|
| 322 |
+
- pexpect=4.8.0=pyhd3eb1b0_3
|
| 323 |
+
- pickleshare=0.7.5=pyhd3eb1b0_1003
|
| 324 |
+
- pillow=11.1.0=py313h41ba818_1
|
| 325 |
+
- pip=25.1=pyhc872135_2
|
| 326 |
+
- pixman=0.46.4=h09dc60e_0
|
| 327 |
+
- pkce=1.0.3=py313hca03da5_0
|
| 328 |
+
- pkginfo=1.12.0=py313hca03da5_0
|
| 329 |
+
- platformdirs=4.3.7=py313hca03da5_0
|
| 330 |
+
- plotly=5.24.1=py313h7eb115d_1
|
| 331 |
+
- pluggy=1.5.0=py313hca03da5_0
|
| 332 |
+
- ply=3.11=py313hca03da5_1
|
| 333 |
+
- poyo=0.5.0=pyhd3eb1b0_0
|
| 334 |
+
- prometheus_client=0.21.1=py313hca03da5_0
|
| 335 |
+
- prompt-toolkit=3.0.43=py313hca03da5_0
|
| 336 |
+
- prompt_toolkit=3.0.43=hd3eb1b0_0
|
| 337 |
+
- propcache=0.3.1=py313h80987f9_0
|
| 338 |
+
- protego=0.4.0=py313hca03da5_0
|
| 339 |
+
- protobuf=5.29.3=py313h514c7bf_0
|
| 340 |
+
- psutil=5.9.0=py313h80987f9_1
|
| 341 |
+
- ptyprocess=0.7.0=pyhd3eb1b0_2
|
| 342 |
+
- pure_eval=0.2.2=pyhd3eb1b0_0
|
| 343 |
+
- py-cpuinfo=9.0.0=py313hca03da5_0
|
| 344 |
+
- py-lief=0.16.4=py313h313beb8_0
|
| 345 |
+
- pyarrow=19.0.0=py313h313beb8_1
|
| 346 |
+
- pyasn1=0.4.8=pyhd3eb1b0_0
|
| 347 |
+
- pyasn1-modules=0.2.8=py_0
|
| 348 |
+
- pybind11-abi=5=hd3eb1b0_0
|
| 349 |
+
- pycodestyle=2.12.1=py313hca03da5_0
|
| 350 |
+
- pycosat=0.6.6=py313h80987f9_2
|
| 351 |
+
- pycparser=2.21=pyhd3eb1b0_0
|
| 352 |
+
- pyct=0.5.0=py313hca03da5_0
|
| 353 |
+
- pycurl=7.45.6=py313h10e1ce2_0
|
| 354 |
+
- pydantic=2.10.3=py313hca03da5_0
|
| 355 |
+
- pydantic-core=2.27.1=py313h2aea54e_0
|
| 356 |
+
- pydantic-settings=2.6.1=py313hca03da5_0
|
| 357 |
+
- pydispatcher=2.0.5=py313hca03da5_3
|
| 358 |
+
- pydocstyle=6.3.0=py313hca03da5_0
|
| 359 |
+
- pyerfa=2.0.1.5=py313h80987f9_0
|
| 360 |
+
- pyflakes=3.2.0=py313hca03da5_0
|
| 361 |
+
- pygithub=2.4.0=py313hca03da5_0
|
| 362 |
+
- pygments=2.19.1=py313hca03da5_0
|
| 363 |
+
- pyjwt=2.10.1=py313hca03da5_0
|
| 364 |
+
- pylint=3.3.5=py313hca03da5_0
|
| 365 |
+
- pylint-venv=3.0.3=py313hca03da5_0
|
| 366 |
+
- pyls-spyder=0.4.0=pyhd3eb1b0_0
|
| 367 |
+
- pynacl=1.5.0=py313h80987f9_1
|
| 368 |
+
- pyobjc-core=10.1=py313h80987f9_0
|
| 369 |
+
- pyobjc-framework-cocoa=10.1=py313hb094c41_0
|
| 370 |
+
- pyobjc-framework-coreservices=10.1=py313hdd8dd1f_0
|
| 371 |
+
- pyobjc-framework-fsevents=10.1=py313hca03da5_0
|
| 372 |
+
- pyodbc=5.2.0=py313h313beb8_0
|
| 373 |
+
- pyopenssl=25.0.0=py313h9e2d7d8_0
|
| 374 |
+
- pyparsing=3.2.0=py313hca03da5_0
|
| 375 |
+
- pyqt=6.9.1=py313h9be6068_0
|
| 376 |
+
- pyqt5-sip=12.13.0=py313h80987f9_1
|
| 377 |
+
- pyqt6-sip=13.10.2=py313h45c6bc8_0
|
| 378 |
+
- pyqtwebengine=6.9.0=py313h8d0667a_0
|
| 379 |
+
- pyside6=6.9.2=py313h7961fb0_0
|
| 380 |
+
- pysocks=1.7.1=py313hca03da5_0
|
| 381 |
+
- pytables=3.10.2=py313h8397fff_2
|
| 382 |
+
- pytest=8.3.4=py313hca03da5_0
|
| 383 |
+
- python=3.13.5=h2eb94d5_100_cp313
|
| 384 |
+
- python-dateutil=2.9.0post0=py313hca03da5_2
|
| 385 |
+
- python-dotenv=1.1.0=py313hca03da5_0
|
| 386 |
+
- python-fastjsonschema=2.20.0=py313hca03da5_0
|
| 387 |
+
- python-json-logger=3.2.1=py313hca03da5_0
|
| 388 |
+
- python-libarchive-c=5.1=pyhd3eb1b0_0
|
| 389 |
+
- python-lmdb=1.6.2=py313h313beb8_0
|
| 390 |
+
- python-lsp-black=2.0.0=py313hca03da5_1
|
| 391 |
+
- python-lsp-jsonrpc=1.1.2=pyhd3eb1b0_0
|
| 392 |
+
- python-lsp-ruff=2.3.0=py313hca03da5_0
|
| 393 |
+
- python-lsp-server=1.13.1=py313h7eb115d_0
|
| 394 |
+
- python-slugify=5.0.2=pyhd3eb1b0_0
|
| 395 |
+
- python-tzdata=2025.2=pyhd3eb1b0_0
|
| 396 |
+
- python.app=3=py313h80987f9_2
|
| 397 |
+
- python_abi=3.13=0_cp313
|
| 398 |
+
- pytoolconfig=1.2.6=py313hca03da5_0
|
| 399 |
+
- pytz=2024.1=py313hca03da5_0
|
| 400 |
+
- pyuca=1.2=py313hca03da5_1
|
| 401 |
+
- pyviz_comms=3.0.2=py313hca03da5_0
|
| 402 |
+
- pywavelets=1.8.0=py313h80987f9_0
|
| 403 |
+
- pyyaml=6.0.2=py313h80987f9_0
|
| 404 |
+
- pyzmq=26.2.0=py313h313beb8_0
|
| 405 |
+
- qdarkstyle=3.2.3=pyhd3eb1b0_0
|
| 406 |
+
- qstylizer=0.2.2=py313hca03da5_0
|
| 407 |
+
- qt-main=6.9.2=h10e828f_0
|
| 408 |
+
- qt5compat=6.9.2=h6ff497d_1
|
| 409 |
+
- qtawesome=1.4.0=py313hca03da5_0
|
| 410 |
+
- qtbase=6.9.2=h32c7431_0
|
| 411 |
+
- qtbase-devel=6.9.2=h2b69d39_0
|
| 412 |
+
- qtconsole=5.7.0=py313hca03da5_0
|
| 413 |
+
- qtdeclarative=6.9.2=hfc17e28_1
|
| 414 |
+
- qtimageformats=6.9.2=h850909d_1
|
| 415 |
+
- qtpy=2.4.1=py313hca03da5_0
|
| 416 |
+
- qtshadertools=6.9.2=hfc17e28_1
|
| 417 |
+
- qtsvg=6.9.2=h310a915_1
|
| 418 |
+
- qttools=6.9.2=hd987465_0
|
| 419 |
+
- qttranslations=6.9.2=hfc17e28_1
|
| 420 |
+
- qtwebchannel=6.9.2=hfc17e28_1
|
| 421 |
+
- qtwebengine=6.9.2=h79e3840_0
|
| 422 |
+
- qtwebsockets=6.9.2=hfc17e28_1
|
| 423 |
+
- queuelib=1.6.2=py313hca03da5_0
|
| 424 |
+
- re2=2024.07.02=h48ca7d4_0
|
| 425 |
+
- readchar=4.0.5=py313hca03da5_0
|
| 426 |
+
- readline=8.2=h1a28f6b_0
|
| 427 |
+
- referencing=0.30.2=py313hca03da5_0
|
| 428 |
+
- regex=2024.11.6=py313h80987f9_0
|
| 429 |
+
- reproc=14.2.4=h313beb8_2
|
| 430 |
+
- reproc-cpp=14.2.4=h313beb8_2
|
| 431 |
+
- requests=2.32.3=py313hca03da5_1
|
| 432 |
+
- requests-file=2.1.0=py313hca03da5_0
|
| 433 |
+
- requests-toolbelt=1.0.0=py313hca03da5_0
|
| 434 |
+
- rfc3339-validator=0.1.4=py313hca03da5_0
|
| 435 |
+
- rfc3986-validator=0.1.1=py313hca03da5_0
|
| 436 |
+
- rich=13.9.4=py313hca03da5_0
|
| 437 |
+
- roman-numerals-py=3.1.0=py313hca03da5_0
|
| 438 |
+
- rope=1.13.0=py313hca03da5_0
|
| 439 |
+
- rpds-py=0.22.3=py313h2aea54e_0
|
| 440 |
+
- rtree=1.0.1=py313hca03da5_0
|
| 441 |
+
- ruamel.yaml=0.18.10=py313h80987f9_0
|
| 442 |
+
- ruamel.yaml.clib=0.2.12=py313h80987f9_0
|
| 443 |
+
- ruamel_yaml=0.17.21=py313h80987f9_0
|
| 444 |
+
- ruff=0.12.0=py313h59dbcda_0
|
| 445 |
+
- s3fs=2025.3.2=py313hca03da5_0
|
| 446 |
+
- scikit-image=0.25.0=py313h313beb8_0
|
| 447 |
+
- scikit-learn=1.6.1=py313h313beb8_0
|
| 448 |
+
- scipy=1.15.3=py313hd7edaaf_0
|
| 449 |
+
- scrapy=2.12.0=py313hca03da5_1
|
| 450 |
+
- seaborn=0.13.2=py313hca03da5_3
|
| 451 |
+
- semver=3.0.2=py313hca03da5_1
|
| 452 |
+
- send2trash=1.8.2=py313hca03da5_1
|
| 453 |
+
- sentry-sdk=2.45.0=py313hca03da5_0
|
| 454 |
+
- service_identity=24.2.0=py313hca03da5_0
|
| 455 |
+
- setuptools=80.9.0=py313hca03da5_0
|
| 456 |
+
- shellingham=1.5.0=py313hca03da5_0
|
| 457 |
+
- simdjson=3.10.1=h48ca7d4_0
|
| 458 |
+
- sip=6.12.0=py313h8740e61_0
|
| 459 |
+
- six=1.17.0=py313hca03da5_0
|
| 460 |
+
- sklearn-compat=0.1.3=py313hca03da5_0
|
| 461 |
+
- smmap=4.0.0=pyhd3eb1b0_0
|
| 462 |
+
- snappy=1.2.1=h313beb8_0
|
| 463 |
+
- sniffio=1.3.0=py313hca03da5_0
|
| 464 |
+
- snowballstemmer=2.2.0=pyhd3eb1b0_0
|
| 465 |
+
- sortedcontainers=2.4.0=pyhd3eb1b0_0
|
| 466 |
+
- soupsieve=2.5=py313hca03da5_0
|
| 467 |
+
- spdlog=1.11.0=h48ca7d4_0
|
| 468 |
+
- sphinx=8.2.3=py313h80987f9_0
|
| 469 |
+
- sphinxcontrib-applehelp=2.0.0=pyhd3eb1b0_1
|
| 470 |
+
- sphinxcontrib-devhelp=2.0.0=pyhd3eb1b0_0
|
| 471 |
+
- sphinxcontrib-htmlhelp=2.1.0=pyhd3eb1b0_0
|
| 472 |
+
- sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
|
| 473 |
+
- sphinxcontrib-qthelp=2.0.0=pyhd3eb1b0_1
|
| 474 |
+
- sphinxcontrib-serializinghtml=2.0.0=pyhd3eb1b0_0
|
| 475 |
+
- spyder=6.1.0=py313h70421b1_1
|
| 476 |
+
- spyder-kernels=3.1.1=py313h7eb115d_0
|
| 477 |
+
- sqlalchemy=2.0.39=py313hbe2cdee_0
|
| 478 |
+
- sqlite=3.50.2=h79febb2_1
|
| 479 |
+
- stack_data=0.2.0=pyhd3eb1b0_0
|
| 480 |
+
- statsmodels=0.14.4=py313h80987f9_0
|
| 481 |
+
- streamlit=1.45.1=py313hca03da5_1
|
| 482 |
+
- superqt=0.7.6=py313hffb95fb_0
|
| 483 |
+
- sympy=1.13.3=py313hca03da5_1
|
| 484 |
+
- tabulate=0.9.0=py313hca03da5_0
|
| 485 |
+
- tapi=1100.0.11=h8754e6a_1
|
| 486 |
+
- tbb=2021.8.0=h48ca7d4_0
|
| 487 |
+
- tblib=3.1.0=py313hca03da5_0
|
| 488 |
+
- tenacity=9.0.0=py313hca03da5_0
|
| 489 |
+
- terminado=0.17.1=py313hca03da5_0
|
| 490 |
+
- text-unidecode=1.3=pyhd3eb1b0_0
|
| 491 |
+
- textdistance=4.2.1=pyhd3eb1b0_0
|
| 492 |
+
- threadpoolctl=3.5.0=py313h7eb115d_0
|
| 493 |
+
- three-merge=0.1.1=pyhd3eb1b0_0
|
| 494 |
+
- tifffile=2025.2.18=py313hca03da5_0
|
| 495 |
+
- tinycss2=1.4.0=py313hca03da5_0
|
| 496 |
+
- tk=8.6.14=h6ba3021_1
|
| 497 |
+
- tldextract=5.1.2=py313hca03da5_0
|
| 498 |
+
- toml=0.10.2=pyhd3eb1b0_0
|
| 499 |
+
- tomli=2.0.1=py313hca03da5_1
|
| 500 |
+
- tomlkit=0.13.2=py313hca03da5_0
|
| 501 |
+
- toolz=1.0.0=py313hca03da5_0
|
| 502 |
+
- tornado=6.5.1=py313h80987f9_0
|
| 503 |
+
- tqdm=4.67.1=py313h7eb115d_0
|
| 504 |
+
- traitlets=5.14.3=py313hca03da5_0
|
| 505 |
+
- truststore=0.10.0=py313hca03da5_0
|
| 506 |
+
- twisted=24.11.0=py313hca03da5_0
|
| 507 |
+
- typer=0.9.0=py313hca03da5_0
|
| 508 |
+
- typing-extensions=4.12.2=py313hca03da5_0
|
| 509 |
+
- typing_extensions=4.12.2=py313hca03da5_0
|
| 510 |
+
- tzdata=2025b=h04d1e81_0
|
| 511 |
+
- uc-micro-py=1.0.1=py313hca03da5_0
|
| 512 |
+
- ujson=5.10.0=py313h313beb8_1
|
| 513 |
+
- unidecode=1.3.8=py313hca03da5_0
|
| 514 |
+
- unixodbc=2.3.11=h1a28f6b_0
|
| 515 |
+
- urllib3=2.3.0=py313hca03da5_0
|
| 516 |
+
- utf8proc=2.6.1=h80987f9_1
|
| 517 |
+
- w3lib=2.1.2=py313hca03da5_0
|
| 518 |
+
- watchdog=4.0.2=py313h80987f9_0
|
| 519 |
+
- wcwidth=0.2.5=pyhd3eb1b0_0
|
| 520 |
+
- webencodings=0.5.1=py313hca03da5_2
|
| 521 |
+
- websocket-client=1.8.0=py313hca03da5_0
|
| 522 |
+
- werkzeug=3.1.3=py313hca03da5_0
|
| 523 |
+
- whatthepatch=1.0.2=py313hca03da5_0
|
| 524 |
+
- wheel=0.45.1=py313hca03da5_0
|
| 525 |
+
- widgetsnbextension=4.0.13=py313hca03da5_0
|
| 526 |
+
- wrapt=1.17.0=py313h80987f9_0
|
| 527 |
+
- wurlitzer=3.0.2=py313hca03da5_0
|
| 528 |
+
- xarray=2025.4.0=py313hca03da5_0
|
| 529 |
+
- xlwings=0.32.1=py313hca03da5_1
|
| 530 |
+
- xyzservices=2022.9.0=py313hca03da5_1
|
| 531 |
+
- xz=5.6.4=h80987f9_1
|
| 532 |
+
- yaml=0.2.5=h1a28f6b_0
|
| 533 |
+
- yaml-cpp=0.8.0=h313beb8_1
|
| 534 |
+
- yapf=0.40.2=py313hca03da5_0
|
| 535 |
+
- yarl=1.18.0=py313h80987f9_0
|
| 536 |
+
- zeromq=4.3.5=h313beb8_0
|
| 537 |
+
- zict=3.0.0=py313hca03da5_0
|
| 538 |
+
- zipp=3.21.0=py313hca03da5_0
|
| 539 |
+
- zlib=1.2.13=h18a0788_1
|
| 540 |
+
- zlib-ng=2.0.7=h80987f9_0
|
| 541 |
+
- zope=1.0=py313hca03da5_1
|
| 542 |
+
- zope.interface=7.1.1=py313h80987f9_0
|
| 543 |
+
- zstandard=0.23.0=py313h1a4646a_1
|
| 544 |
+
- zstd=1.5.6=hfb09047_0
|
| 545 |
+
- pip:
|
| 546 |
+
- accelerate==1.12.0
|
| 547 |
+
- hf-xet==1.2.0
|
| 548 |
+
- huggingface-hub==0.36.0
|
| 549 |
+
- peft==0.18.1
|
| 550 |
+
- safetensors==0.7.0
|
| 551 |
+
- tokenizers==0.22.2
|
| 552 |
+
- torch==2.9.1
|
| 553 |
+
- transformers==4.57.6
|
| 554 |
+
- typer-slim==0.21.1
|
notebooks/lyricloop.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
torch
|
| 3 |
+
transformers
|
| 4 |
+
peft
|
| 5 |
+
bitsandbytes
|
| 6 |
+
accelerate
|
| 7 |
+
pandas
|
| 8 |
+
matplotlib
|
| 9 |
+
seaborn
|
| 10 |
+
scikit-learn
|
src/lyricloop/__init__.py
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LyricLoop: A modular framework for fine-tuning and evaluating
|
| 3 |
+
LLMs on musical lyric generation and critique.
|
| 4 |
+
"""
|
src/lyricloop/config.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
+
# -------------------------
|
| 4 |
+
# Model Configuration
|
| 5 |
+
# -------------------------
|
| 6 |
+
MODEL_ID = "google/gemma-2b-it"
|
| 7 |
+
RANDOM_STATE = 42
|
| 8 |
+
|
| 9 |
+
# -------------------------
|
| 10 |
+
# Path Management
|
| 11 |
+
# -------------------------
|
| 12 |
+
# Assumes the script is in lyricloop-llm/src/lyricloop/
|
| 13 |
+
# Go up 2 levels to reach the lyricloop-llm root
|
| 14 |
+
PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
|
| 15 |
+
|
| 16 |
+
# Define standard subfolders
|
| 17 |
+
ASSETS_DIR = os.path.join(PROJECT_ROOT, "assets")
|
| 18 |
+
DATA_DIR = os.path.join(PROJECT_ROOT, "data")
|
| 19 |
+
MODELS_DIR = os.path.join(PROJECT_ROOT, "models")
|
| 20 |
+
|
| 21 |
+
def ensure_dirs():
|
| 22 |
+
"""Initializes the project folder structure if it does not exist."""
|
| 23 |
+
os.makedirs(ASSETS_DIR, exist_ok=True)
|
| 24 |
+
os.makedirs(DATA_DIR, exist_ok=True)
|
| 25 |
+
os.makedirs(MODELS_DIR, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
# -------------------------
|
| 28 |
+
# Global History Template
|
| 29 |
+
# -------------------------
|
| 30 |
+
def initialize_history():
|
| 31 |
+
"""Returns a fresh instance of the experiment history log."""
|
| 32 |
+
return {
|
| 33 |
+
"baseline": {"scores": [], "avg_confidence": [], "samples": {}, "metrics": {}},
|
| 34 |
+
"1.0": {"scores": [], "avg_confidence": [], "samples": {}, "metrics": {}},
|
| 35 |
+
"2.0": {"scores": [], "avg_confidence": [], "samples": {}, "metrics": {}}
|
| 36 |
+
}
|
src/lyricloop/data.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import os
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import seaborn as sns
|
| 6 |
+
from .config import RANDOM_STATE, ASSETS_DIR
|
| 7 |
+
|
| 8 |
+
# -------------------------
|
| 9 |
+
# Prompt Construction
|
| 10 |
+
# -------------------------
|
| 11 |
+
|
| 12 |
+
def build_critic_prompt(genre, artist, title, lyrics, max_lyric_length=300):
|
| 13 |
+
"""Constructs the instruction-tuning prompt for the Critic persona."""
|
| 14 |
+
lyrics_snippet = lyrics[:max_lyric_length]
|
| 15 |
+
|
| 16 |
+
instruction = (
|
| 17 |
+
"You are a professional music critic. Provide specific feedback on how to improve "
|
| 18 |
+
"the lyrics based on the genre and artist style. \n"
|
| 19 |
+
"Formatting Rules: \n"
|
| 20 |
+
"1. Use plain text with clear line breaks.\n"
|
| 21 |
+
"2. Ensure all song titles and words have proper spacing."
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
context = (
|
| 25 |
+
f"Target Genre: {genre}\n"
|
| 26 |
+
f"Target Artist: {artist}\n"
|
| 27 |
+
f"Target Title: {title}\n\n"
|
| 28 |
+
f"Lyrics to Evaluate:\n{lyrics_snippet}"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
return f"<start_of_turn>user\n{instruction}\n\n{context}<end_of_turn>\n<start_of_turn>model\n"
|
| 32 |
+
|
| 33 |
+
def build_revision_prompt(genre, artist, title, draft, critiques):
|
| 34 |
+
"""Constructs the prompt for the 'Revise' step of the refinement loop."""
|
| 35 |
+
instruction = (
|
| 36 |
+
"You are an expert songwriter. Revise the provided lyrics by incorporating "
|
| 37 |
+
"the specific feedback from the critic while maintaining the genre and artist style."
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
context = (
|
| 41 |
+
f"Genre: {genre}\n"
|
| 42 |
+
f"Artist Style: {artist}\n"
|
| 43 |
+
f"Title: {title}\n\n"
|
| 44 |
+
f"Current Draft:\n{draft}\n\n"
|
| 45 |
+
f"Critic Feedback:\n{critiques}"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
return f"<start_of_turn>user\n{instruction}\n\n{context}<end_of_turn>\n<start_of_turn>model\n"
|
| 49 |
+
|
| 50 |
+
def build_inference_prompt(genre, artist, title):
|
| 51 |
+
"""Reconstructs the prompt format used during v1.0 training."""
|
| 52 |
+
instruction = "Generate lyrics for a song based on these details."
|
| 53 |
+
input_context = f"Genre: {genre}\nArtist: {artist}\nTitle: {title}"
|
| 54 |
+
|
| 55 |
+
return (
|
| 56 |
+
f"<start_of_turn>user\n{instruction}\n\n{input_context}<end_of_turn>\n"
|
| 57 |
+
f"<start_of_turn>model\n"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
def format_prompt(row):
|
| 61 |
+
"""Converts a dataframe row into a structured Gemma control-token prompt."""
|
| 62 |
+
instruction = "Generate lyrics for a song based on these details."
|
| 63 |
+
input_context = f"Genre: {row['tag']}\nArtist: {row['artist']}\nTitle: {row['title']}"
|
| 64 |
+
response = row['lyrics']
|
| 65 |
+
|
| 66 |
+
return (
|
| 67 |
+
f"<start_of_turn>user\n{instruction}\n\n{input_context}<end_of_turn>\n"
|
| 68 |
+
f"<start_of_turn>model\n{response}<end_of_turn>"
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
# -------------------------
|
| 72 |
+
# Text Processing
|
| 73 |
+
# -------------------------
|
| 74 |
+
|
| 75 |
+
def format_lyrics(text):
|
| 76 |
+
"""Cleans up raw model output by enforcing structural newlines and spacing."""
|
| 77 |
+
# Add double newlines before section headers like [Verse], [Chorus]
|
| 78 |
+
text = re.sub(r'(\[.*?\])', r'\n\n\1\n', text)
|
| 79 |
+
|
| 80 |
+
# Add a newline when a capital letter follows a lowercase letter immediately
|
| 81 |
+
text = re.sub(r'([a-z])([A-Z])', r'\1\n\2', text)
|
| 82 |
+
return text.strip()
|
| 83 |
+
|
| 84 |
+
# -------------------------
|
| 85 |
+
# Dataset Management
|
| 86 |
+
# -------------------------
|
| 87 |
+
|
| 88 |
+
def format_critic_training_row(row):
|
| 89 |
+
"""Standardizes raw rows into the Critic instruction-tuning format."""
|
| 90 |
+
prompt = build_critic_prompt(row.tag, row.artist, row.title, row.lyrics)
|
| 91 |
+
|
| 92 |
+
target_output = (
|
| 93 |
+
f"Genre Fit: The {row.tag} style is well-maintained.\n"
|
| 94 |
+
f"Artist Style: Matches the {row.artist} aesthetic.\n"
|
| 95 |
+
f"Improvements: Consider refining the rhythmic flow in the second verse."
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
return f"{prompt}{target_output}<eos>"
|
| 99 |
+
|
| 100 |
+
def prepare_lyric_dataset(lyrics_filename, reviews_filename, songs_per_genre=200):
|
| 101 |
+
"""Loads, cleans, and balances the dataset while exporting EDA plots."""
|
| 102 |
+
from .viz import save_figure
|
| 103 |
+
|
| 104 |
+
lyrics_path = os.path.join("data", lyrics_filename)
|
| 105 |
+
reviews_path = os.path.join("data", reviews_filename)
|
| 106 |
+
|
| 107 |
+
print(f"Loading & Cleaning Raw Data...")
|
| 108 |
+
|
| 109 |
+
lyrics_df = pd.read_csv(lyrics_path, on_bad_lines='skip')
|
| 110 |
+
reviews_df = pd.read_csv(reviews_path)
|
| 111 |
+
|
| 112 |
+
lyrics_df = lyrics_df.dropna(subset=['lyrics', 'artist', 'tag'])
|
| 113 |
+
reviews_df = reviews_df.dropna(subset=['genre', 'artist'])
|
| 114 |
+
|
| 115 |
+
lyrics_clean = lyrics_df.drop_duplicates(subset="artist")[["artist", "lyrics", "title", "tag"]]
|
| 116 |
+
merged_df = reviews_df.merge(lyrics_clean, on="artist", how="left").dropna(subset=["lyrics", "tag"])
|
| 117 |
+
|
| 118 |
+
# --- Plot 1: Raw Distribution ("Before") ---
|
| 119 |
+
plt.figure(figsize=(10, 5))
|
| 120 |
+
top_raw = merged_df['tag'].value_counts().nlargest(10)
|
| 121 |
+
sns.barplot(x=top_raw.values, y=top_raw.index, hue=top_raw.index, palette='viridis', legend=False)
|
| 122 |
+
plt.title(f"Raw Genre Distribution (n={len(merged_df):,})")
|
| 123 |
+
save_figure("eda_1_raw_distribution.png")
|
| 124 |
+
|
| 125 |
+
# Class balancing logic
|
| 126 |
+
balanced_df = merged_df.groupby("tag", group_keys=False).apply(
|
| 127 |
+
lambda x: x.sample(min(len(x), songs_per_genre), random_state=RANDOM_STATE)
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# --- Plot 2: Balanced Distribution ("After") ---
|
| 131 |
+
plt.figure(figsize=(10, 5))
|
| 132 |
+
sns.countplot(data=balanced_df, y='tag', hue='tag', palette='magma', legend=False)
|
| 133 |
+
plt.title(f"Balanced Genre Distribution (n={len(balanced_df):,})")
|
| 134 |
+
save_figure("eda_2_balanced_distribution.png")
|
| 135 |
+
|
| 136 |
+
return balanced_df
|
src/lyricloop/environment.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import numpy as np
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
# -------------------------
|
| 6 |
+
# Replicability Logic
|
| 7 |
+
# -------------------------
|
| 8 |
+
|
| 9 |
+
def set_seed(seed=42):
|
| 10 |
+
"""
|
| 11 |
+
Sets universal random seeds to ensure deterministic results
|
| 12 |
+
across Python, NumPy, and PyTorch.
|
| 13 |
+
"""
|
| 14 |
+
# Python
|
| 15 |
+
random.seed(seed)
|
| 16 |
+
|
| 17 |
+
# NumPy
|
| 18 |
+
np.random.seed(seed)
|
| 19 |
+
|
| 20 |
+
# PyTorch
|
| 21 |
+
torch.manual_seed(seed)
|
| 22 |
+
torch.cuda.manual_seed_all(seed)
|
| 23 |
+
|
| 24 |
+
# Force deterministic algorithms to ensure GPU calculates the exact same gradients every time
|
| 25 |
+
torch.backends.cudnn.deterministic = True
|
| 26 |
+
torch.backends.cudnn.benchmark = False
|
| 27 |
+
|
| 28 |
+
print(f"Random Seed Set to: {seed}")
|
| 29 |
+
|
| 30 |
+
# -------------------------
|
| 31 |
+
# Hardware Diagnostics
|
| 32 |
+
# -------------------------
|
| 33 |
+
|
| 34 |
+
def get_device_capability():
|
| 35 |
+
"""
|
| 36 |
+
Diagnostics to ensure the GPU is ready for LLM Fine-Tuning.
|
| 37 |
+
Enables TF32 for newer NVIDIA architectures (L4).
|
| 38 |
+
"""
|
| 39 |
+
# Enable TF32 for modern Tensor Cores
|
| 40 |
+
torch.backends.cuda.matmul.allow_tf32 = True
|
| 41 |
+
torch.backends.cudnn.allow_tf32 = True
|
| 42 |
+
|
| 43 |
+
if not torch.cuda.is_available():
|
| 44 |
+
raise RuntimeError("No GPU found! Go to Runtime > Change runtime type > Select NVIDIA L4.")
|
| 45 |
+
|
| 46 |
+
device = torch.device('cuda')
|
| 47 |
+
|
| 48 |
+
# Extract GPU Metadata
|
| 49 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 50 |
+
gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1e9
|
| 51 |
+
capability = torch.cuda.get_device_capability(0)
|
| 52 |
+
bf16_support = torch.cuda.is_bf16_supported()
|
| 53 |
+
|
| 54 |
+
# Print Status Report
|
| 55 |
+
print(f"GPU Detected: {gpu_name}")
|
| 56 |
+
print(f" |-- Memory: {gpu_mem:.2f} GB")
|
| 57 |
+
print(f" |-- Compute Capability: {capability}")
|
| 58 |
+
print(f" |-- BFloat16 Support: {'Yes' if bf16_support else 'No'}")
|
| 59 |
+
|
| 60 |
+
# Professional Warning System
|
| 61 |
+
if "L4" not in gpu_name and "A100" not in gpu_name:
|
| 62 |
+
print(f"\nWarning: Using {gpu_name}. Performance may be suboptimal for Gemma fine-tuning.")
|
| 63 |
+
|
| 64 |
+
return device
|
src/lyricloop/metrics.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
|
| 4 |
+
# -------------------------
|
| 5 |
+
# Generation Engines
|
| 6 |
+
# -------------------------
|
| 7 |
+
|
| 8 |
+
def execute_generation(model, tokenizer, prompt, max_tokens=300, temperature=0.85, do_sample=False):
|
| 9 |
+
"""
|
| 10 |
+
A universal engine that handles GPU movement, sampling, and decoding.
|
| 11 |
+
The do_sample=False default is ideal for objective Critic tasks.
|
| 12 |
+
"""
|
| 13 |
+
model.eval()
|
| 14 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 15 |
+
|
| 16 |
+
with torch.no_grad():
|
| 17 |
+
outputs = model.generate(
|
| 18 |
+
**inputs,
|
| 19 |
+
max_new_tokens=max_tokens,
|
| 20 |
+
no_repeat_ngram_size=3,
|
| 21 |
+
do_sample=do_sample,
|
| 22 |
+
temperature=temperature if do_sample else None,
|
| 23 |
+
repetition_penalty=1.2,
|
| 24 |
+
pad_token_id=tokenizer.pad_token_id,
|
| 25 |
+
eos_token_id=tokenizer.eos_token_id
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
input_length = inputs.input_ids.shape[1]
|
| 29 |
+
generated_text = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
|
| 30 |
+
|
| 31 |
+
return generated_text.strip()
|
| 32 |
+
|
| 33 |
+
def get_token_confidences(model, tokenizer, prompt, max_tokens=50):
|
| 34 |
+
"""
|
| 35 |
+
Generates text and returns a list of (token, confidence_score) tuples.
|
| 36 |
+
Used for creating confidence heatmaps in the UI.
|
| 37 |
+
"""
|
| 38 |
+
model.eval()
|
| 39 |
+
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
| 40 |
+
|
| 41 |
+
with torch.no_grad():
|
| 42 |
+
outputs = model.generate(
|
| 43 |
+
**inputs,
|
| 44 |
+
max_new_tokens=max_tokens,
|
| 45 |
+
return_dict_in_generate=True,
|
| 46 |
+
output_scores=True,
|
| 47 |
+
do_sample=True,
|
| 48 |
+
temperature=0.8,
|
| 49 |
+
pad_token_id=tokenizer.pad_token_id
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
input_len = inputs.input_ids.shape[1]
|
| 53 |
+
gen_ids = outputs.sequences[0][input_len:]
|
| 54 |
+
|
| 55 |
+
# Calculate softmax probabilities for each generated token
|
| 56 |
+
probs = [torch.softmax(score, dim=-1)[0, tid].item() for tid, score in zip(gen_ids, outputs.scores)]
|
| 57 |
+
tokens = [tokenizer.decode(tid) for tid in gen_ids]
|
| 58 |
+
|
| 59 |
+
return list(zip(tokens, probs))
|
| 60 |
+
|
| 61 |
+
# -------------------------
|
| 62 |
+
# Evaluation Metrics
|
| 63 |
+
# -------------------------
|
| 64 |
+
|
| 65 |
+
def calculate_perplexity(model, tokenizer, text):
|
| 66 |
+
"""
|
| 67 |
+
Computes the perplexity (uncertainty) of the model for a specific text sequence.
|
| 68 |
+
Lower score = the model finds the text natural/predictable.
|
| 69 |
+
Higher score = the model finds the text confusing/alien.
|
| 70 |
+
"""
|
| 71 |
+
model.eval()
|
| 72 |
+
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
| 73 |
+
|
| 74 |
+
with torch.no_grad():
|
| 75 |
+
outputs = model(inputs.input_ids, labels=inputs.input_ids)
|
| 76 |
+
loss = outputs.loss
|
| 77 |
+
|
| 78 |
+
# Perplexity is mathematically the exponential of the cross-entropy loss
|
| 79 |
+
return torch.exp(loss).item()
|
| 80 |
+
|
| 81 |
+
# -------------------------
|
| 82 |
+
# Trainer Log Parsers
|
| 83 |
+
# -------------------------
|
| 84 |
+
|
| 85 |
+
def extract_trainer_metrics(model_trainer):
|
| 86 |
+
"""
|
| 87 |
+
Universal log parser for Hugging Face Trainer.
|
| 88 |
+
Extracts step-by-step history for plotting and final validation.
|
| 89 |
+
"""
|
| 90 |
+
logs = model_trainer.state.log_history
|
| 91 |
+
|
| 92 |
+
# Extract coordinates for plotting (Training vs Evaluation)
|
| 93 |
+
train_metrics = [{"step": x["step"], "loss": x["loss"]} for x in logs if "loss" in x]
|
| 94 |
+
eval_metrics = [{"step": x["step"], "loss": x["eval_loss"]} for x in logs if "eval_loss" in x]
|
| 95 |
+
|
| 96 |
+
final_loss = eval_metrics[-1]["loss"] if eval_metrics else None
|
| 97 |
+
|
| 98 |
+
return {
|
| 99 |
+
"train_steps": [x["step"] for x in train_metrics],
|
| 100 |
+
"train_loss": [x["loss"] for x in train_metrics],
|
| 101 |
+
"eval_steps": [x["step"] for x in eval_metrics],
|
| 102 |
+
"eval_loss": [x["loss"] for x in eval_metrics],
|
| 103 |
+
"val_loss": final_loss,
|
| 104 |
+
"perplexity": np.exp(final_loss) if final_loss else None
|
| 105 |
+
}
|
src/lyricloop/viz.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import numpy as np
|
| 3 |
+
import matplotlib.pyplot as plt
|
| 4 |
+
import seaborn as sns
|
| 5 |
+
from .config import ASSETS_DIR
|
| 6 |
+
|
| 7 |
+
# -------------------------
|
| 8 |
+
# Visualization Utilities
|
| 9 |
+
# -------------------------
|
| 10 |
+
|
| 11 |
+
def save_figure(filename):
|
| 12 |
+
"""
|
| 13 |
+
Saves the current matplotlib figure with consistent professional settings.
|
| 14 |
+
Saves to the global assets directory with 300 DPI resolution.
|
| 15 |
+
"""
|
| 16 |
+
path = os.path.join(ASSETS_DIR, filename)
|
| 17 |
+
|
| 18 |
+
# Ensure layout does not clip labels
|
| 19 |
+
plt.tight_layout()
|
| 20 |
+
|
| 21 |
+
# High resolution for documentation and reports
|
| 22 |
+
plt.savefig(path, dpi=300, bbox_inches='tight')
|
| 23 |
+
print(f" Artifact Saved: {path}")
|
| 24 |
+
|
| 25 |
+
plt.show()
|
| 26 |
+
plt.close()
|
| 27 |
+
|
| 28 |
+
# -------------------------
|
| 29 |
+
# Training Diagnostics
|
| 30 |
+
# -------------------------
|
| 31 |
+
|
| 32 |
+
def plot_learning_curves(metrics, version="v1"):
|
| 33 |
+
"""
|
| 34 |
+
Standardized learning curve plotter for loss and validation metrics.
|
| 35 |
+
"""
|
| 36 |
+
sns.set_style("whitegrid")
|
| 37 |
+
plt.figure(figsize=(12, 6))
|
| 38 |
+
|
| 39 |
+
# Training Loss
|
| 40 |
+
sns.lineplot(x=metrics["train_steps"], y=metrics["train_loss"],
|
| 41 |
+
label='Training Loss', color='#4E79A7', linewidth=2.5)
|
| 42 |
+
|
| 43 |
+
# Validation Loss (if available)
|
| 44 |
+
if metrics["eval_loss"]:
|
| 45 |
+
sns.lineplot(x=metrics["eval_steps"], y=metrics["eval_loss"],
|
| 46 |
+
label='Validation Loss', color='#E15759', linewidth=2.5, marker='o')
|
| 47 |
+
|
| 48 |
+
plt.title(f'Learning Curve: LyricLoop {version.upper()}', fontsize=16, fontweight='bold', pad=15)
|
| 49 |
+
plt.xlabel('Training Steps')
|
| 50 |
+
plt.ylabel('Loss')
|
| 51 |
+
plt.legend(frameon=True, fancybox=True, framealpha=0.9)
|
| 52 |
+
|
| 53 |
+
save_figure(f"eval_loss_curve_{version}.png")
|
| 54 |
+
|
| 55 |
+
# -------------------------
|
| 56 |
+
# Confidence & Interpretability
|
| 57 |
+
# -------------------------
|
| 58 |
+
|
| 59 |
+
def plot_token_heatmap(token_conf_pairs, title="Confidence Heatmap", filename="heatmap.png"):
|
| 60 |
+
"""Draws a text heatmap where background color represents model confidence."""
|
| 61 |
+
fig = plt.figure(figsize=(10, 4))
|
| 62 |
+
ax = fig.add_axes([0, 0, 1, 1])
|
| 63 |
+
ax.axis('off')
|
| 64 |
+
|
| 65 |
+
x, y = 0.02, 0.85
|
| 66 |
+
line_height = 0.12
|
| 67 |
+
confidences = [p[1] for p in token_conf_pairs]
|
| 68 |
+
avg_conf = np.mean(confidences) if confidences else 0
|
| 69 |
+
|
| 70 |
+
ax.text(0.02, 0.95, f"{title} (Avg: {avg_conf:.2%})",
|
| 71 |
+
fontsize=12, fontweight='bold', transform=ax.transAxes)
|
| 72 |
+
|
| 73 |
+
for t, score in token_conf_pairs:
|
| 74 |
+
# Professional Color Scale: Green (High), Orange (Medium), Red (Low)
|
| 75 |
+
if score > 0.7: bg = '#aaffaa'
|
| 76 |
+
elif score > 0.3: bg = '#ffeeba'
|
| 77 |
+
else: bg = '#ffcccc'
|
| 78 |
+
|
| 79 |
+
clean_text = t.replace('\n', '↵ ')
|
| 80 |
+
text_w = len(clean_text) * 0.015
|
| 81 |
+
|
| 82 |
+
if x + text_w > 0.95:
|
| 83 |
+
x = 0.02
|
| 84 |
+
y -= line_height
|
| 85 |
+
|
| 86 |
+
ax.text(x, y, clean_text, bbox=dict(facecolor=bg, edgecolor='none', pad=2, alpha=0.8),
|
| 87 |
+
fontfamily='monospace', fontsize=10, transform=ax.transAxes)
|
| 88 |
+
x += text_w + 0.005
|
| 89 |
+
|
| 90 |
+
save_figure(filename)
|
| 91 |
+
return avg_conf
|
| 92 |
+
|
| 93 |
+
def plot_confidence_summary(genres, scores, title="Confidence Summary", filename="conf_summary.png"):
|
| 94 |
+
"""Standardized bar chart for comparing confidence across genres."""
|
| 95 |
+
plt.figure(figsize=(11, 6))
|
| 96 |
+
x = np.arange(len(genres))
|
| 97 |
+
width = 0.35
|
| 98 |
+
palette = ['#A0A0A0', '#4E79A7', '#E15759'] # grey, blue, red
|
| 99 |
+
|
| 100 |
+
if isinstance(scores, list):
|
| 101 |
+
scores_dict = {"Model Output": scores}
|
| 102 |
+
width = 0.5
|
| 103 |
+
else:
|
| 104 |
+
scores_dict = scores
|
| 105 |
+
|
| 106 |
+
active_scores = {k: v for k, v in scores_dict.items() if len(v) == len(genres)}
|
| 107 |
+
|
| 108 |
+
for i, (label, values) in enumerate(active_scores.items()):
|
| 109 |
+
offset = (i - (len(active_scores)-1)/2) * width if len(active_scores) > 1 else 0
|
| 110 |
+
bars = plt.bar(x + offset, values, width, label=label,
|
| 111 |
+
color=palette[i % 3], edgecolor='black', alpha=0.8)
|
| 112 |
+
|
| 113 |
+
for bar in bars:
|
| 114 |
+
h = bar.get_height()
|
| 115 |
+
plt.text(bar.get_x() + bar.get_width()/2., h + 0.02, f'{h:.2f}',
|
| 116 |
+
ha='center', va='bottom', fontweight='bold', fontsize=9)
|
| 117 |
+
|
| 118 |
+
plt.title(title, fontsize=16, fontweight='bold')
|
| 119 |
+
plt.ylabel('Average Confidence Score')
|
| 120 |
+
plt.xticks(x, genres)
|
| 121 |
+
plt.ylim(0, 1.1)
|
| 122 |
+
if len(active_scores) > 1:
|
| 123 |
+
plt.legend(loc='lower right')
|
| 124 |
+
plt.grid(axis='y', linestyle='--', alpha=0.3)
|
| 125 |
+
save_figure(filename)
|
| 126 |
+
|
| 127 |
+
# -------------------------
|
| 128 |
+
# Performance Comparison
|
| 129 |
+
# -------------------------
|
| 130 |
+
|
| 131 |
+
def plot_perplexity(genres, scores_dict, title="Model Perplexity", filename="perplexity.png", use_log=False):
|
| 132 |
+
"""Global plotter for perplexity scores with support for log-scaling."""
|
| 133 |
+
plt.figure(figsize=(10, 6))
|
| 134 |
+
if use_log: plt.yscale('log')
|
| 135 |
+
|
| 136 |
+
x = np.arange(len(genres))
|
| 137 |
+
comp_colors = ['#A0A0A0', '#4E79A7'] # grey for Baseline, blue for Fine-Tuned
|
| 138 |
+
|
| 139 |
+
if len(scores_dict) == 1:
|
| 140 |
+
label = list(scores_dict.keys())[0]
|
| 141 |
+
values = list(scores_dict.values())[0]
|
| 142 |
+
bars = plt.bar(genres, values, color='#A0A0A0', edgecolor='black', alpha=0.8)
|
| 143 |
+
else:
|
| 144 |
+
width = 0.35
|
| 145 |
+
for i, (label, values) in enumerate(scores_dict.items()):
|
| 146 |
+
offset = (i - (len(scores_dict)-1)/2) * width
|
| 147 |
+
bars = plt.bar(x + offset, values, width, label=label, color=comp_colors[i % 2], edgecolor='black')
|
| 148 |
+
|
| 149 |
+
plt.title(title, fontsize=14, fontweight='bold')
|
| 150 |
+
plt.ylabel('Perplexity (Lower is Better)', fontsize=12)
|
| 151 |
+
plt.xticks(x, genres)
|
| 152 |
+
plt.grid(axis='y', linestyle='--', alpha=0.5)
|
| 153 |
+
if len(scores_dict) > 1: plt.legend()
|
| 154 |
+
|
| 155 |
+
save_figure(filename)
|