Upload folder using huggingface_hub
Browse files- app.py +12 -2
- config/settings.py +1 -1
- src/generator/generator.py +91 -79
app.py
CHANGED
|
@@ -10,10 +10,19 @@ if os.getenv('DEPLOYMENT_MODE') == 'huggingface':
|
|
| 10 |
else:
|
| 11 |
os.environ['DYLD_LIBRARY_PATH'] = '/usr/local/opt/sqlite/lib'
|
| 12 |
|
|
|
|
|
|
|
| 13 |
from src.generator.generator import LyricGenerator
|
| 14 |
from config.settings import Settings
|
| 15 |
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
def main():
|
| 18 |
"""Main application function"""
|
| 19 |
st.set_page_config(
|
|
@@ -23,6 +32,7 @@ def main():
|
|
| 23 |
)
|
| 24 |
|
| 25 |
st.title("SongLift LyrGen2")
|
|
|
|
| 26 |
|
| 27 |
# Only run startup once per session
|
| 28 |
if 'initialized' not in st.session_state:
|
|
@@ -57,7 +67,7 @@ def main():
|
|
| 57 |
with st.chat_message("user"):
|
| 58 |
st.write(user_msg)
|
| 59 |
with st.chat_message("assistant"):
|
| 60 |
-
st.markdown(
|
| 61 |
|
| 62 |
# Chat interface
|
| 63 |
user_input = st.chat_input("Enter your prompt (ask for new lyrics or modify existing ones)...")
|
|
@@ -76,7 +86,7 @@ def main():
|
|
| 76 |
|
| 77 |
# Store the response
|
| 78 |
lyrics = response['answer']
|
| 79 |
-
st.markdown(
|
| 80 |
st.session_state.current_lyrics = lyrics
|
| 81 |
|
| 82 |
# Display sources with content
|
|
|
|
| 10 |
else:
|
| 11 |
os.environ['DYLD_LIBRARY_PATH'] = '/usr/local/opt/sqlite/lib'
|
| 12 |
|
| 13 |
+
import re
|
| 14 |
+
|
| 15 |
from src.generator.generator import LyricGenerator
|
| 16 |
from config.settings import Settings
|
| 17 |
|
| 18 |
|
| 19 |
+
def format_lyrics(text: str) -> str:
|
| 20 |
+
"""Format lyrics: bold section markers, clean whitespace."""
|
| 21 |
+
# Bold section markers like [Verse 1], [Chorus], etc.
|
| 22 |
+
text = re.sub(r'\[([^\]]+)\]', r'**[\1]**', text)
|
| 23 |
+
return text.strip()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
def main():
|
| 27 |
"""Main application function"""
|
| 28 |
st.set_page_config(
|
|
|
|
| 32 |
)
|
| 33 |
|
| 34 |
st.title("SongLift LyrGen2")
|
| 35 |
+
st.sidebar.markdown(f"**Model:** {Settings.LLM_MODEL}")
|
| 36 |
|
| 37 |
# Only run startup once per session
|
| 38 |
if 'initialized' not in st.session_state:
|
|
|
|
| 67 |
with st.chat_message("user"):
|
| 68 |
st.write(user_msg)
|
| 69 |
with st.chat_message("assistant"):
|
| 70 |
+
st.markdown(format_lyrics(assistant_msg))
|
| 71 |
|
| 72 |
# Chat interface
|
| 73 |
user_input = st.chat_input("Enter your prompt (ask for new lyrics or modify existing ones)...")
|
|
|
|
| 86 |
|
| 87 |
# Store the response
|
| 88 |
lyrics = response['answer']
|
| 89 |
+
st.markdown(format_lyrics(lyrics))
|
| 90 |
st.session_state.current_lyrics = lyrics
|
| 91 |
|
| 92 |
# Display sources with content
|
config/settings.py
CHANGED
|
@@ -26,7 +26,7 @@ class Settings:
|
|
| 26 |
|
| 27 |
# Model Settings
|
| 28 |
EMBEDDING_MODEL = "text-embedding-ada-002"
|
| 29 |
-
LLM_MODEL = "gpt-
|
| 30 |
|
| 31 |
# ChromaDB Settings
|
| 32 |
CHROMA_COLLECTION_NAME = "lyrics_v1"
|
|
|
|
| 26 |
|
| 27 |
# Model Settings
|
| 28 |
EMBEDDING_MODEL = "text-embedding-ada-002"
|
| 29 |
+
LLM_MODEL = os.getenv("LLM_MODEL", "gpt-5.2")
|
| 30 |
|
| 31 |
# ChromaDB Settings
|
| 32 |
CHROMA_COLLECTION_NAME = "lyrics_v1"
|
src/generator/generator.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
from typing import Dict, List, Optional
|
| 2 |
from pathlib import Path
|
|
|
|
| 3 |
import shutil
|
| 4 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
| 5 |
from langchain_chroma import Chroma
|
| 6 |
from langchain.chains import ConversationalRetrievalChain
|
| 7 |
from langchain.prompts import PromptTemplate
|
|
|
|
|
|
|
|
|
|
| 8 |
from huggingface_hub import snapshot_download, hf_hub_download, HfApi
|
| 9 |
from config.settings import Settings
|
| 10 |
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
@@ -13,6 +17,37 @@ import sqlite3
|
|
| 13 |
from openai import APIConnectionError, RateLimitError
|
| 14 |
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
class LyricGenerator:
|
| 17 |
def __init__(self):
|
| 18 |
"""Initialize the generator with embeddings"""
|
|
@@ -189,9 +224,12 @@ class LyricGenerator:
|
|
| 189 |
|
| 190 |
def _setup_qa_chain(self) -> None:
|
| 191 |
"""Initialize the QA chain for generating lyrics"""
|
| 192 |
-
# Configure retriever
|
| 193 |
-
retriever =
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
| 195 |
)
|
| 196 |
|
| 197 |
# Create document prompt
|
|
@@ -201,76 +239,50 @@ class LyricGenerator:
|
|
| 201 |
)
|
| 202 |
|
| 203 |
# System prompt template
|
| 204 |
-
system_template = """You are
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
- Blends inspiration with innovation
|
| 249 |
-
- Stays current and authentic
|
| 250 |
-
- Avoids dated or overdone tropes
|
| 251 |
-
|
| 252 |
-
3. Always ensure:
|
| 253 |
-
- Technical excellence in rhythm and flow
|
| 254 |
-
- Proper section marking [Verse/Chorus/etc]
|
| 255 |
-
- Emotional resonance and authenticity
|
| 256 |
-
- Modern, fresh perspective
|
| 257 |
-
- Strategic use of space and silence
|
| 258 |
-
|
| 259 |
-
Previous Chat History:
|
| 260 |
-
{chat_history}
|
| 261 |
-
|
| 262 |
-
Retrieved Examples for Analysis:
|
| 263 |
-
{context}
|
| 264 |
-
|
| 265 |
-
Response Format:
|
| 266 |
-
1. Brief analysis of relevant patterns from retrieved examples
|
| 267 |
-
2. Generated lyrics with clear section marking
|
| 268 |
-
3. Explanation of how you adapted specific elements from the sources
|
| 269 |
-
|
| 270 |
-
User Request: {question}
|
| 271 |
-
|
| 272 |
-
Response: Let me analyze the retrieved lyrics and craft something that
|
| 273 |
-
combines their strongest elements with professional songwriting principles."""
|
| 274 |
|
| 275 |
prompt = PromptTemplate(
|
| 276 |
input_variables=["context", "chat_history", "question"],
|
|
@@ -279,12 +291,12 @@ class LyricGenerator:
|
|
| 279 |
|
| 280 |
# Initialize language model
|
| 281 |
llm = ChatOpenAI(
|
| 282 |
-
temperature=0.
|
| 283 |
-
model_name=
|
| 284 |
-
max_tokens=
|
| 285 |
-
top_p=0.
|
| 286 |
-
presence_penalty=0.
|
| 287 |
-
frequency_penalty=0.
|
| 288 |
)
|
| 289 |
|
| 290 |
# Create QA chain
|
|
|
|
| 1 |
from typing import Dict, List, Optional
|
| 2 |
from pathlib import Path
|
| 3 |
+
from collections import defaultdict
|
| 4 |
import shutil
|
| 5 |
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
|
| 6 |
from langchain_chroma import Chroma
|
| 7 |
from langchain.chains import ConversationalRetrievalChain
|
| 8 |
from langchain.prompts import PromptTemplate
|
| 9 |
+
from langchain_core.callbacks import CallbackManagerForRetrieverRun
|
| 10 |
+
from langchain_core.documents import Document
|
| 11 |
+
from langchain_core.retrievers import BaseRetriever
|
| 12 |
from huggingface_hub import snapshot_download, hf_hub_download, HfApi
|
| 13 |
from config.settings import Settings
|
| 14 |
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
|
|
|
| 17 |
from openai import APIConnectionError, RateLimitError
|
| 18 |
|
| 19 |
|
| 20 |
+
class DiverseRetriever(BaseRetriever):
|
| 21 |
+
"""Retriever that caps per-artist chunks to ensure diverse sources."""
|
| 22 |
+
|
| 23 |
+
vector_store: Chroma
|
| 24 |
+
fetch_k: int = 60
|
| 25 |
+
max_per_artist: int = 3
|
| 26 |
+
final_k: int = 20
|
| 27 |
+
|
| 28 |
+
class Config:
|
| 29 |
+
arbitrary_types_allowed = True
|
| 30 |
+
|
| 31 |
+
def _get_relevant_documents(
|
| 32 |
+
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
| 33 |
+
) -> List[Document]:
|
| 34 |
+
results = self.vector_store.similarity_search_with_score(
|
| 35 |
+
query, k=self.fetch_k
|
| 36 |
+
)
|
| 37 |
+
artist_counts: dict = defaultdict(int)
|
| 38 |
+
selected: List[Document] = []
|
| 39 |
+
for doc, _score in results:
|
| 40 |
+
artist = doc.metadata.get("artist", "unknown")
|
| 41 |
+
if artist_counts[artist] < self.max_per_artist:
|
| 42 |
+
artist_counts[artist] += 1
|
| 43 |
+
selected.append(doc)
|
| 44 |
+
if len(selected) >= self.final_k:
|
| 45 |
+
break
|
| 46 |
+
unique_artists = len(artist_counts)
|
| 47 |
+
print(f"DiverseRetriever: {len(selected)} chunks from {unique_artists} artists")
|
| 48 |
+
return selected
|
| 49 |
+
|
| 50 |
+
|
| 51 |
class LyricGenerator:
|
| 52 |
def __init__(self):
|
| 53 |
"""Initialize the generator with embeddings"""
|
|
|
|
| 224 |
|
| 225 |
def _setup_qa_chain(self) -> None:
|
| 226 |
"""Initialize the QA chain for generating lyrics"""
|
| 227 |
+
# Configure diverse retriever: fetch 60, cap 3 per artist, return top 20
|
| 228 |
+
retriever = DiverseRetriever(
|
| 229 |
+
vector_store=self.vector_store,
|
| 230 |
+
fetch_k=60,
|
| 231 |
+
max_per_artist=3,
|
| 232 |
+
final_k=20,
|
| 233 |
)
|
| 234 |
|
| 235 |
# Create document prompt
|
|
|
|
| 239 |
)
|
| 240 |
|
| 241 |
# System prompt template
|
| 242 |
+
system_template = """You are a professional songwriter. Your ONLY output is lyrics with section markers. No analysis. No explanation. No commentary. No source references. Nothing before the lyrics. Nothing after the lyrics.
|
| 243 |
+
|
| 244 |
+
OUTPUT FORMAT:
|
| 245 |
+
[Section Name]
|
| 246 |
+
lyrics here
|
| 247 |
+
|
| 248 |
+
[Next Section]
|
| 249 |
+
lyrics here
|
| 250 |
+
|
| 251 |
+
That is it. Section markers in brackets, lyrics below each one. Nothing else.
|
| 252 |
+
|
| 253 |
+
BANNED WORDS — never use any of these:
|
| 254 |
+
neon, algorithm, digital, phantom, pixel, shadow, reflection, concrete jungle, echo chamber, midnight, cypher, whisper, canvas, tapestry, labyrinth, mosaic, symphony, aurora, ethereal, cosmic, celestial, visceral, transcend, paradigm, ultrapixel, emotional phantom
|
| 255 |
+
|
| 256 |
+
SPECIFICITY RULES — every line must follow these:
|
| 257 |
+
1. SCENES over concepts — put the listener in a specific place with objects they can see
|
| 258 |
+
2. OBJECTS over adjectives — name the actual thing (a dented Ford Ranger, not "a broken vehicle")
|
| 259 |
+
3. CONSEQUENCES over metaphors — show what happened, not what it was like
|
| 260 |
+
4. TEMPORAL SPECIFICS — Tuesday, 3 AM, late August, second semester, not "one day" or "sometimes"
|
| 261 |
+
5. DOMESTIC DETAIL — kitchen tables, screen doors, parking lots, unwashed mugs, not abstract spaces
|
| 262 |
+
6. GUT-PUNCH MOMENTS — one line per section that lands like a physical sensation
|
| 263 |
+
7. EMOTIONAL SHIFTS — each section should feel different from the last (angry→tender, numb→raw)
|
| 264 |
+
|
| 265 |
+
BAD (abstract LLM output):
|
| 266 |
+
"In the shadows of my mind I wander through the echoes
|
| 267 |
+
Searching for a light that fades like whispers in the wind
|
| 268 |
+
The tapestry of memories unravels at the seams
|
| 269 |
+
As I transcend the boundaries of what we could have been"
|
| 270 |
+
|
| 271 |
+
GOOD (specific, lived-detail writing):
|
| 272 |
+
"Your coffee mug's still on the counter, Wednesday morning light
|
| 273 |
+
I keep stepping over boxes I packed three weeks ago
|
| 274 |
+
The landlord needs an answer and my sister needs a ride
|
| 275 |
+
But I'm just sitting on the kitchen floor in yesterday's clothes"
|
| 276 |
+
|
| 277 |
+
The GOOD example works because: specific mug, specific day, specific floor, specific detail about boxes with a time frame, real obligations pulling at the narrator. Every line is a scene you can photograph.
|
| 278 |
+
|
| 279 |
+
Previous Chat History:
|
| 280 |
+
{chat_history}
|
| 281 |
+
|
| 282 |
+
Reference lyrics — study their rhythm, rhyme schemes, flow, tone, and the kinds of details they use. Draw inspiration from their emotional register and imagery approach, but write original lines. Do not copy phrases directly:
|
| 283 |
+
{context}
|
| 284 |
+
|
| 285 |
+
User Request: {question}"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
|
| 287 |
prompt = PromptTemplate(
|
| 288 |
input_variables=["context", "chat_history", "question"],
|
|
|
|
| 291 |
|
| 292 |
# Initialize language model
|
| 293 |
llm = ChatOpenAI(
|
| 294 |
+
temperature=0.95,
|
| 295 |
+
model_name=Settings.LLM_MODEL,
|
| 296 |
+
max_tokens=2000,
|
| 297 |
+
top_p=0.9,
|
| 298 |
+
presence_penalty=0.25,
|
| 299 |
+
frequency_penalty=0.2
|
| 300 |
)
|
| 301 |
|
| 302 |
# Create QA chain
|